Skip to content

Commit

Permalink
apacheGH-36319: [Go][Parquet] Improved row group writer error messages (
Browse files Browse the repository at this point in the history
apache#36320)

### Rationale for this change

### What changes are included in this PR?

Updated error messages for mismatched column row counts to help identify the column, or rowgroup index which failed the check, and how many rows were expected vs found.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: apache#36319

Authored-by: Mark Wolfe <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
wolfeidau authored Jul 5, 2023
1 parent 6521489 commit ff6717b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
8 changes: 6 additions & 2 deletions go/parquet/file/file_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ func (t *SerializeTestSuite) unequalNumRows(maxRows int64, rowsPerCol []int64) {
t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil)
cw.Close()
}
t.Error(rgw.Close())
err := rgw.Close()
t.Error(err)
t.ErrorContains(err, "row mismatch for unbuffered row group")
}

func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol []int64) {
Expand All @@ -154,7 +156,9 @@ func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol []
t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil)
cw.Close()
}
t.Error(rgw.Close())
err := rgw.Close()
t.Error(err)
t.ErrorContains(err, "row mismatch for buffered row group")
}

func (t *SerializeTestSuite) TestZeroRows() {
Expand Down
8 changes: 4 additions & 4 deletions go/parquet/file/row_group_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,13 @@ func (rg *rowGroupWriter) checkRowsWritten() error {
if rg.nrows == 0 {
rg.nrows = current
} else if rg.nrows != current {
return xerrors.New("row mismatch")
return xerrors.Errorf("row mismatch for unbuffered row group: %d, count expected: %d, actual: %d", rg.ordinal, current, rg.nrows)
}
} else if rg.buffered {
current := rg.columnWriters[0].RowsWritten()
for _, wr := range rg.columnWriters[1:] {
for i, wr := range rg.columnWriters[1:] {
if current != wr.RowsWritten() {
return xerrors.New("row mismatch error")
return xerrors.Errorf("row mismatch for buffered row group: %d, column: %d, count expected: %d, actual: %d", rg.ordinal, i+1, current, wr.RowsWritten())
}
}
rg.nrows = current
Expand Down Expand Up @@ -182,7 +182,7 @@ func (rg *rowGroupWriter) Column(i int) (ColumnChunkWriter, error) {
if i >= 0 && i < len(rg.columnWriters) {
return rg.columnWriters[i], nil
}
return nil, xerrors.New("invalid column number requested")
return nil, xerrors.Errorf("invalid column number requested: %d", i)
}

func (rg *rowGroupWriter) CurrentColumn() int { return rg.metadata.CurrentColumn() }
Expand Down

0 comments on commit ff6717b

Please sign in to comment.