Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e3107ba
add dockey
FGasper Aug 11, 2025
0ac82bc
no base64
FGasper Aug 11, 2025
828f072
refactor a bit
FGasper Aug 12, 2025
bb0ac8b
fix agg
FGasper Aug 12, 2025
b965033
bring in mongosync
FGasper Aug 13, 2025
2d6ca57
add error check
FGasper Aug 13, 2025
b137e99
precreate
FGasper Aug 13, 2025
1f25562
try older versions
FGasper Aug 13, 2025
fc6b8c6
back-compat
FGasper Aug 13, 2025
c3121d4
maybe fix 5
FGasper Aug 13, 2025
32563c9
avoid pre-v6 wonkiness
FGasper Aug 13, 2025
8b6d91b
comment
FGasper Aug 13, 2025
5460ac8
rename DB … ??
FGasper Aug 13, 2025
e23adab
move test
FGasper Aug 14, 2025
c9b78f5
more
FGasper Aug 14, 2025
34c63cf
fix test
FGasper Aug 15, 2025
9d90009
Use the simpler logic that routing actually uses.
FGasper Aug 15, 2025
a7c0de9
rollback
FGasper Aug 19, 2025
0ed6ddd
revert
FGasper Aug 19, 2025
578310c
pare back
FGasper Aug 19, 2025
f28c72f
“true” doc key
FGasper Aug 19, 2025
0300b8b
fix DB name
FGasper Aug 19, 2025
7c12d88
fix test
FGasper Aug 19, 2025
ac2b99a
compat with pre-v6
FGasper Aug 19, 2025
7bcddaa
only sharded for now
FGasper Aug 19, 2025
9f01a5f
all CI
FGasper Aug 19, 2025
54d347a
Update internal/verifier/dockey_agg_test.go
FGasper Aug 19, 2025
877a9cc
refactor & add more tests
FGasper Aug 19, 2025
c0ee5c4
Merge branch 'REP-6465-fix-dotted-shard-key' of github.com:FGasper/mi…
FGasper Aug 19, 2025
5c01288
comment
FGasper Aug 19, 2025
035fd76
check reverse
FGasper Aug 20, 2025
ed0aa0d
panic & assert in tests
FGasper Aug 20, 2025
b86061e
no error check
FGasper Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions dockey/agg.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Package dockey contains logic related to document key determination.
// Its tests use a cluster and thus are stored in internal/verifier.

package dockey

import (
"maps"
"slices"
"strconv"

"github.com/samber/lo"
"go.mongodb.org/mongo-driver/bson"
)

// ExtractTrueDocKeyAgg returns an aggregation expression that extracts the
// document key from the document to which the `docExpr` refers.
//
// NB: This avoids the problem documented in SERVER-109340; as a result,
// the returned key may not always match the change stream’s `documentKey`
// (because the server misreports its own sharding logic).
func ExtractTrueDocKeyAgg(fieldNames []string, docExpr string) bson.D {
assertFieldNameUniqueness(fieldNames)

var docKeyNumKeys bson.D
numToKeyLookup := map[string]string{}

for n, name := range fieldNames {
var valExpr = docExpr + "." + name

// Aggregation forbids direct creation of an object with dotted keys.
// So here we create an object with numeric keys, then below we’ll
// map the numeric keys back to the real ones.

nStr := strconv.Itoa(n)
docKeyNumKeys = append(docKeyNumKeys, bson.E{nStr, valExpr})
numToKeyLookup[nStr] = name
}

// Now convert the numeric keys back to the real ones.
return mapObjectKeysAgg(docKeyNumKeys, numToKeyLookup)
}

// Potentially reusable:
func mapObjectKeysAgg(expr any, mapping map[string]string) bson.D {
// We would ideally pass mapping into the aggregation and $getField
// to get the mapped key, but pre-v8 server versions required $getField’s
// field parameter to be a constant. (And pre-v5 didn’t have $getField
// at all.) So we use a $switch instead.
mapAgg := bson.D{
{"$switch", bson.D{
{"branches", lo.Map(
slices.Collect(maps.Keys(mapping)),
func(key string, _ int) bson.D {
return bson.D{
{"case", bson.D{
{"$eq", bson.A{
key,
"$$numericKey",
}},
}},
{"then", mapping[key]},
}
},
)},
}},
}

return bson.D{
{"$arrayToObject", bson.D{
{"$map", bson.D{
{"input", bson.D{
{"$objectToArray", expr},
}},
{"in", bson.D{
{"$let", bson.D{
{"vars", bson.D{
{"numericKey", "$$this.k"},
{"value", "$$this.v"},
}},
{"in", bson.D{
{"k", mapAgg},
{"v", "$$value"},
}},
}},
}},
}},
}},
}
}
20 changes: 20 additions & 0 deletions dockey/agg_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package dockey

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestAggPanic(t *testing.T) {
assert.Panics(
t,
func() {
ExtractTrueDocKeyAgg(
[]string{"foo", "bar", "foo"},
"$$ROOT",
)
},
"duplicate field name should cause panic",
)
}
55 changes: 55 additions & 0 deletions dockey/raw.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package dockey

import (
"fmt"
"strings"

"github.com/pkg/errors"
"github.com/samber/lo"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/x/bsonx/bsoncore"
)

// This extracts the document key from a document gets its field names.
//
// NB: This avoids the problem documented in SERVER-109340; as a result,
// the returned key may not always match the change stream’s `documentKey`
// (because the server misreports its own sharding logic).
func ExtractTrueDocKeyFromDoc(
fieldNames []string,
doc bson.Raw,
) (bson.Raw, error) {
assertFieldNameUniqueness(fieldNames)

var dk bson.D
for _, field := range fieldNames {

// This is how sharding routes documents: it always
// splits on the dot and looks deeply into the document.
parts := strings.Split(field, ".")
val, err := doc.LookupErr(parts...)

if errors.Is(err, bsoncore.ErrElementNotFound) || errors.As(err, &bsoncore.InvalidDepthTraversalError{}) {
// If the document lacks a value for this field
// then don’t add it to the document key.
continue
} else if err == nil {
dk = append(dk, bson.E{field, val})
} else {
return nil, errors.Wrapf(err, "extracting doc key field %#q from doc %+v", field, doc)
}
}

docKey, err := bson.Marshal(dk)
if err != nil {
return nil, errors.Wrapf(err, "marshaling doc key %v from doc %v", dk, docKey)
}

return docKey, nil
}

func assertFieldNameUniqueness(fieldNames []string) {
if len(lo.Uniq(fieldNames)) != len(fieldNames) {
panic(fmt.Sprintf("Duplicate field names: %v", fieldNames))
}
}
60 changes: 60 additions & 0 deletions dockey/raw_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package dockey

import (
"slices"
"testing"

"github.com/10gen/migration-verifier/dockey/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.mongodb.org/mongo-driver/bson"
)

func TestExtractTrueDocKeyFromDoc(t *testing.T) {
for _, reverseYN := range []bool{false, true} {
fieldNames := slices.Clone(test.FieldNames)

if reverseYN {
slices.Reverse(fieldNames)
}

for _, curCase := range test.TestCases {
raw, err := bson.Marshal(curCase.Doc)
require.NoError(t, err)

computedRaw, err := ExtractTrueDocKeyFromDoc(
fieldNames,
raw,
)
require.NoError(t, err)

var computedDocKey bson.D
require.NoError(t, bson.Unmarshal(computedRaw, &computedDocKey))

expectedDocKey := slices.Clone(curCase.DocKey)
if reverseYN {
slices.Reverse(expectedDocKey)
}

assert.Equal(
t,
expectedDocKey,
computedDocKey,
"doc key for %v (fieldNames: %v)",
bson.Raw(raw),
fieldNames,
)
}
}

assert.Panics(
t,
func() {
_, _ = ExtractTrueDocKeyFromDoc(
[]string{"foo", "bar", "foo"},
bson.Raw{0},
)
},
"duplicate field name should cause panic",
)
}
78 changes: 78 additions & 0 deletions dockey/test/cases.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package test

import (
"github.com/10gen/migration-verifier/mslices"
"go.mongodb.org/mongo-driver/bson"
)

type TestCase struct {
Doc bson.D
DocKey bson.D
}

var FieldNames = mslices.Of("_id", "foo.bar.baz")

var TestCases = []TestCase{
{
Doc: bson.D{
{"_id", "abc"},
{"foo", bson.D{
{"bar", bson.D{{"baz", 1}}},
{"bar.baz", 2},
}},
{"foo.bar", bson.D{{"baz", 3}}},
{"foo.bar.baz", 4},
},
DocKey: bson.D{
{"_id", "abc"},
{"foo.bar.baz", int32(1)},
},
},
{
Doc: bson.D{
{"_id", "bbb"},
{"foo", bson.D{
{"bar", bson.D{{"baz", 1}}},
{"bar.baz", 2},
}},
{"foo.bar", bson.D{{"baz", 3}}},
},
DocKey: bson.D{
{"_id", "bbb"},
{"foo.bar.baz", int32(1)},
},
},
{
Doc: bson.D{
{"_id", "ccc"},
{"foo", bson.D{
{"bar.baz", 2},
}},
{"foo.bar", bson.D{{"baz", 3}}},
},
DocKey: bson.D{
{"_id", "ccc"},
},
},
{
Doc: bson.D{
{"_id", "ddd"},
{"foo", bson.D{
{"bar", bson.D{{"baz", nil}}},
}},
},
DocKey: bson.D{
{"_id", "ddd"},
{"foo.bar.baz", nil},
},
},
{
Doc: bson.D{
{"_id", "eee"},
{"foo", "bar"},
},
DocKey: bson.D{
{"_id", "eee"},
},
},
}
Loading