From ff6717b413858730f282d57d9f22d08381ce86f0 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Thu, 6 Jul 2023 00:48:40 +1000 Subject: [PATCH] GH-36319: [Go][Parquet] Improved row group writer error messages (#36320) ### Rationale for this change ### What changes are included in this PR? Updated error messages for mismatched column row counts to help identify the column, or rowgroup index which failed the check, and how many rows were expected vs found. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #36319 Authored-by: Mark Wolfe Signed-off-by: Matt Topol --- go/parquet/file/file_writer_test.go | 8 ++++++-- go/parquet/file/row_group_writer.go | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index 8ff202b947bf0..bba0d2be28d98 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -139,7 +139,9 @@ func (t *SerializeTestSuite) unequalNumRows(maxRows int64, rowsPerCol []int64) { t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil) cw.Close() } - t.Error(rgw.Close()) + err := rgw.Close() + t.Error(err) + t.ErrorContains(err, "row mismatch for unbuffered row group") } func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol []int64) { @@ -154,7 +156,9 @@ func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol [] t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil) cw.Close() } - t.Error(rgw.Close()) + err := rgw.Close() + t.Error(err) + t.ErrorContains(err, "row mismatch for buffered row group") } func (t *SerializeTestSuite) TestZeroRows() { diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go index 8fbb04ad7afb6..410f48b477a7c 100644 --- a/go/parquet/file/row_group_writer.go +++ b/go/parquet/file/row_group_writer.go @@ -110,13 +110,13 @@ func (rg *rowGroupWriter) checkRowsWritten() error { if rg.nrows == 0 { rg.nrows = current } else if rg.nrows != current { - return xerrors.New("row mismatch") + return xerrors.Errorf("row mismatch for unbuffered row group: %d, count expected: %d, actual: %d", rg.ordinal, current, rg.nrows) } } else if rg.buffered { current := rg.columnWriters[0].RowsWritten() - for _, wr := range rg.columnWriters[1:] { + for i, wr := range rg.columnWriters[1:] { if current != wr.RowsWritten() { - return xerrors.New("row mismatch error") + return xerrors.Errorf("row mismatch for buffered row group: %d, column: %d, count expected: %d, actual: %d", rg.ordinal, i+1, current, wr.RowsWritten()) } } rg.nrows = current @@ -182,7 +182,7 @@ func (rg *rowGroupWriter) Column(i int) (ColumnChunkWriter, error) { if i >= 0 && i < len(rg.columnWriters) { return rg.columnWriters[i], nil } - return nil, xerrors.New("invalid column number requested") + return nil, xerrors.Errorf("invalid column number requested: %d", i) } func (rg *rowGroupWriter) CurrentColumn() int { return rg.metadata.CurrentColumn() }