Skip to content

Commit

Permalink
GH-38751: [C++][Go][Parquet] Add tests for reading Float16 files in p…
Browse files Browse the repository at this point in the history
…arquet-testing (#38753)

### Rationale for this change

Validates compatibility between implementations when reading `Float16` columns.

### What changes are included in this PR?

- Bumps `parquet-testing` commit to latest to use the recently-added files
- Adds reader tests for C++ and Go in the same vein as apache/arrow-rs#5003

### Are these changes tested?

Yes

### Are there any user-facing changes?

No

* Closes: #38751

Authored-by: benibus <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
benibus authored and kou committed Aug 30, 2024
1 parent 2ff298b commit bfe8b2a
Showing 1 changed file with 67 additions and 0 deletions.
67 changes: 67 additions & 0 deletions parquet/pqarrow/file_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/apache/arrow/go/v15/arrow"
"github.com/apache/arrow/go/v15/arrow/array"
"github.com/apache/arrow/go/v15/arrow/decimal128"
"github.com/apache/arrow/go/v15/arrow/float16"
"github.com/apache/arrow/go/v15/arrow/memory"
"github.com/apache/arrow/go/v15/parquet"
"github.com/apache/arrow/go/v15/parquet/file"
Expand Down Expand Up @@ -100,6 +101,72 @@ func TestArrowReaderAdHocReadDecimals(t *testing.T) {
}
}

func TestArrowReaderAdHocReadFloat16s(t *testing.T) {
tests := []struct {
file string
len int
vals []float16.Num
}{
{"float16_nonzeros_and_nans", 8,
[]float16.Num{
float16.New(1.0),
float16.New(-2.0),
float16.NaN(),
float16.New(0.0),
float16.New(-1.0),
float16.New(0.0).Negate(),
float16.New(2.0),
}},
{"float16_zeros_and_nans", 3,
[]float16.Num{
float16.New(0.0),
float16.NaN(),
}},
}

dataDir := getDataDir()
for _, tt := range tests {
t.Run(tt.file, func(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)

filename := filepath.Join(dataDir, tt.file+".parquet")
require.FileExists(t, filename)

rdr, err := file.OpenParquetFile(filename, false, file.WithReadProps(parquet.NewReaderProperties(mem)))
require.NoError(t, err)
defer rdr.Close()

arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem)
require.NoError(t, err)

tbl, err := arrowRdr.ReadTable(context.Background())
require.NoError(t, err)
defer tbl.Release()

assert.EqualValues(t, 1, tbl.NumCols())
assert.Truef(t, arrow.TypeEqual(tbl.Schema().Field(0).Type, &arrow.Float16Type{}), "expected: %s\ngot: %s", tbl.Schema().Field(0).Type, arrow.Float16Type{})

valCol := tbl.Column(0)
assert.EqualValues(t, tt.len, valCol.Len())
assert.Len(t, valCol.Data().Chunks(), 1)

chunk := valCol.Data().Chunk(0).(*array.Float16)
assert.True(t, chunk.IsNull(0))
for i := 0; i < tt.len-1; i++ {
expected := tt.vals[i]
actual := chunk.Value(i + 1)
if expected.IsNaN() {
// NaN representations aren't guaranteed to be exact on a binary level
assert.True(t, actual.IsNaN())
} else {
assert.Equal(t, expected.Uint16(), actual.Uint16())
}
}
})
}
}

func TestRecordReaderParallel(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
Expand Down

0 comments on commit bfe8b2a

Please sign in to comment.