Skip to content

Commit

Permalink
add quotedValuesAreStrings option
Browse files Browse the repository at this point in the history
  • Loading branch information
loicalleyne committed Oct 30, 2024
1 parent 205e54d commit bb70631
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 8 deletions.
22 changes: 15 additions & 7 deletions bodkin.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ type (

// Bodkin is a collection of field paths, describing the columns of a structured input(s).
type Bodkin struct {
original *fieldPos
old *fieldPos
new *fieldPos
inferTimeUnits bool
typeConversion bool
err error
changes error
original *fieldPos
old *fieldPos
new *fieldPos
inferTimeUnits bool
quotedValuesAreStrings bool
typeConversion bool
err error
changes error
}

// NewBodkin returns a new Bodkin value from a structured input.
Expand Down Expand Up @@ -106,6 +107,13 @@ func WithTypeConversion() Option {
}
}

// WithTypeConversion enables upgrading the column types to fix compatibilty conflicts.
func WithQuotedValuesAreStrings() Option {
return func(cfg config) {
cfg.quotedValuesAreStrings = true
}
}

// Unify merges structured input's column definition with the previously input's schema.
// Any uppopulated fields, empty objects or empty slices in JSON input are skipped.
func (u *Bodkin) Unify(a any) {
Expand Down
3 changes: 2 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ var jsonS1 string = `{
"results": [{"id":7594}],
"arrayscalar":[],
"datefield":"1979-01-01",
"timefield":"01:02:03"
"timefield":"01:02:03",
"boolquotedfield":"true"
}`

var jsonS2 string = `{
Expand Down
22 changes: 22 additions & 0 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,19 @@ var UpgradableTypes []arrow.Type = []arrow.Type{arrow.INT8,
arrow.STRING,
}

// Regular expressions and variables for type inference.
var (
timestampMatchers []*regexp.Regexp
dateMatcher *regexp.Regexp
timeMatcher *regexp.Regexp
integerMatcher *regexp.Regexp
floatMatcher *regexp.Regexp
boolMatcher []string
)

func init() {
registerTsMatchers()
registerQuotedStringValueMatchers()
}

func registerTsMatchers() {
Expand All @@ -64,6 +69,12 @@ func registerTsMatchers() {
regexp.MustCompile(`^\d{4}-\d{1,2}-\d{1,2}[T ]\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})? *(([+-]\d{1,2}(:\d{1,2})?)|Z|UTC)?$`))
}

func registerQuotedStringValueMatchers() {
integerMatcher = regexp.MustCompile(`^[-+]?\d+$`)
floatMatcher = regexp.MustCompile(`^[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?$`)
boolMatcher = append(boolMatcher, "true", "false")
}

func newFieldPos(b *Bodkin) *fieldPos {
f := new(fieldPos)
f.owner = b
Expand Down Expand Up @@ -352,6 +363,17 @@ func goType2Arrow(f *fieldPos, gt any) arrow.DataType {
return arrow.FixedWidthTypes.Time64ns
}
}
if !f.owner.quotedValuesAreStrings {
if slices.Contains(boolMatcher, t) {
return arrow.FixedWidthTypes.Boolean
}
if integerMatcher.MatchString(t) {
return arrow.PrimitiveTypes.Int64
}
if floatMatcher.MatchString(t) {
return arrow.PrimitiveTypes.Float64
}
}
dt = arrow.BinaryTypes.String
case []byte:
dt = arrow.BinaryTypes.Binary
Expand Down

0 comments on commit bb70631

Please sign in to comment.