Skip to content

Commit

Permalink
consistenthash: resolve hash collisions
Browse files Browse the repository at this point in the history
When generating the hashring, if there are many segments for an inserted
key, there's a nonzero chance of a collision. Previously, the last
insert won and we'd end up with an extra copy of that hash-function
value in the `keyHashes` slice. Change this so the key that sorts last
wins.
  • Loading branch information
dfinkel committed Oct 20, 2022
1 parent e298fc0 commit f3ea3b3
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 1 deletion.
7 changes: 7 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ jobs:
GO111MODULE: on
run: go test -race -mod=readonly -count 2 ./...

# Run all consistenthash Fuzz tests for 30s with go 1.19
- name: Fuzz Consistent-Hash
if: ${{matrix.goversion == '1.19'}}
env:
GO111MODULE: on
run: go test -fuzz=. -fuzztime=30s ./consistenthash

# Run all the tests with the paranoid linked-list checks enabled
- name: Race Test Paranoid LinkedList
env:
Expand Down
14 changes: 13 additions & 1 deletion consistenthash/consistenthash.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,19 @@ func (m *Map) Add(keys ...string) {
m.keys[key] = struct{}{}
for i := 0; i < m.segsPerKey; i++ {
hash := m.hash([]byte(strconv.Itoa(i) + key))
m.keyHashes = append(m.keyHashes, hash)
// If there's a collision on a "replica" (segment-boundary), we only want
// the entry that sorts latest to get inserted (not the last one we saw).
//
// It doesn't matter how we reconcile collisions (the smallest would work
// just as well), we just need it to be insertion-order independent so all
// instances converge on the same hashmap.
if extKey, ok := m.hashMap[hash]; !ok {
// Only add another member for this hash-value if there shouldn't be
// one there already.
m.keyHashes = append(m.keyHashes, hash)
} else if extKey >= key {
continue
}
m.hashMap[hash] = key
}
}
Expand Down
61 changes: 61 additions & 0 deletions consistenthash/consistenthash_fuzz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//go:build go1.18

/*
Copyright 2022 Vimeo Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package consistenthash

import (
"math/rand"
"reflect"
"testing"
)

func FuzzHashCollision(f *testing.F) {
hashFunc := func(d []byte) uint32 {
if len(d) < 2 {
return uint32(d[0])
}
return uint32(d[0] + d[1])
}
f.Fuzz(func(t *testing.T, in1, in2, in3, in4 string, seed int64, segments uint) {
// Skip anything with too many segments
if segments < 1 || segments > 1<<20 {
t.Skip()
}
src := rand.NewSource(seed)
r := rand.New(src)
input := [...]string{in1, in2, in3, in4}
r.Shuffle(len(input), func(i, j int) { input[i], input[j] = input[j], input[i] })

hash1 := New(int(segments), hashFunc)
hash1.Add(input[:]...)

r.Shuffle(len(input), func(i, j int) { input[i], input[j] = input[j], input[i] })
hash2 := New(int(segments), hashFunc)
hash2.Add(input[:]...)

if !reflect.DeepEqual(hash1.hashMap, hash2.hashMap) {
t.Errorf("hash maps are not identical: %+v vs %+v", hash1.hashMap, hash2.hashMap)
}
if !reflect.DeepEqual(hash1.keys, hash2.keys) {
t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys)
}
if !reflect.DeepEqual(hash1.keyHashes, hash2.keyHashes) {
t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys)
}
})
}
20 changes: 20 additions & 0 deletions consistenthash/consistenthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ func TestHashing(t *testing.T) {

}

func TestHashCollision(t *testing.T) {
hashFunc := func(d []byte) uint32 {
return uint32(d[0])
}
hash1 := New(1, hashFunc)
hash1.Add("Bill", "Bob", "Bonny")
hash2 := New(1, hashFunc)
hash2.Add("Bill", "Bonny", "Bob")

t.Log(hash1.hashMap[uint32('0')], hash2.hashMap[uint32('0')])
t.Logf("%+v", hash1.hashMap)
t.Logf("%v", hash1.keyHashes)
t.Logf("%+v", hash2.hashMap)
t.Logf("%v", hash2.keyHashes)
if hash1.hashMap[uint32('0')] != hash2.hashMap[uint32('0')] {
t.Errorf("inconsistent owner for hash %d: %s vs %s", 'B',
hash1.hashMap[uint32('B')], hash2.hashMap[uint32('B')])
}
}

func TestConsistency(t *testing.T) {
hash1 := New(1, nil)
hash2 := New(1, nil)
Expand Down

0 comments on commit f3ea3b3

Please sign in to comment.