Skip to content

Commit

Permalink
Extended docs regarding ValueAtQuantile() and added ValuesAreEquivale…
Browse files Browse the repository at this point in the history
…nt() (#39)

* [add] Extended docs regarding ValueAtQuantile() and added ValuesAreEquivalent()

* [add] made New() documentation clearer

* [fix] Fixes per PR review on New()

* [fix] Fixed New() not to panic on numberOfSignificantValueDigits < 1 || numberOfSignificantValueDigits > 5. Adding linter check to CI

* [add] Added whitebox testing for hdr.go ( specifically for New() numberOfSignificantValueDigits limits ).
  • Loading branch information
filipecosta90 authored Nov 24, 2020
1 parent 6663c35 commit 1dc8842
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 590 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,15 @@ jobs:
- name: Checkout code
uses: actions/checkout@v2
- name: Test
run: make test
run: make test
lint:
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/setup-go@v2
with:
go-version: 1.15.x
- name: Checkout code
uses: actions/checkout@v2
- name: Lint
run: make lint
2 changes: 1 addition & 1 deletion example_hdr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

// This latency Histogram could be used to track and analyze the counts of
// observed integer values between 0 us and 30000000 us ( 30 secs )
// observed integer values between 1 us and 30000000 us ( 30 secs )
// while maintaining a value precision of 4 significant digits across that range,
// translating to a value resolution of :
// - 1 microsecond up to 10 milliseconds,
Expand Down
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ module github.com/HdrHistogram/hdrhistogram-go
go 1.14

require (
github.com/golangci/golangci-lint v1.31.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/go-cmp v0.5.2
github.com/kr/text v0.2.0 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/stretchr/testify v1.6.1
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
)
564 changes: 3 additions & 561 deletions go.sum

Large diffs are not rendered by default.

102 changes: 78 additions & 24 deletions hdr.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type Snapshot struct {
// non-normally distributed data (like latency) with a high degree of accuracy
// and a bounded degree of precision.
type Histogram struct {
lowestTrackableValue int64
lowestDiscernibleValue int64
highestTrackableValue int64
unitMagnitude int64
significantFigures int64
Expand Down Expand Up @@ -69,23 +69,41 @@ func (h *Histogram) SetStartTimeMs(startTimeMs int64) {
h.startTimeMs = startTimeMs
}

// New returns a new Histogram instance capable of tracking values in the given
// range and with the given amount of precision.
func New(minValue, maxValue int64, sigfigs int) *Histogram {
if sigfigs < 1 || 5 < sigfigs {
panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
}

largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
// Construct a Histogram given the Lowest and Highest values to be tracked and a number of significant decimal digits.
//
// Providing a lowestDiscernibleValue is useful in situations where the units used for the histogram's values are
// much smaller that the minimal accuracy required.
// E.g. when tracking time values stated in nanosecond units, where the minimal accuracy required is a microsecond,
// the proper value for lowestDiscernibleValue would be 1000.
//
// Note: the numberOfSignificantValueDigits must be [1,5]. If lower than 1 the numberOfSignificantValueDigits will be
// forced to 1, and if higher than 5 the numberOfSignificantValueDigits will be forced to 5.
func New(lowestDiscernibleValue, highestTrackableValue int64, numberOfSignificantValueDigits int) *Histogram {
if numberOfSignificantValueDigits < 1 {
numberOfSignificantValueDigits = 1
} else if numberOfSignificantValueDigits > 5 {
numberOfSignificantValueDigits = 5
}
if lowestDiscernibleValue < 1 {
lowestDiscernibleValue = 1
}

// Given a 3 decimal point accuracy, the expectation is obviously for "+/- 1 unit at 1000". It also means that
// it's "ok to be +/- 2 units at 2000". The "tricky" thing is that it is NOT ok to be +/- 2 units at 1999. Only
// starting at 2000. So internally, we need to maintain single unit resolution to 2x 10^decimalPoints.
largestValueWithSingleUnitResolution := 2 * math.Pow10(numberOfSignificantValueDigits)

// We need to maintain power-of-two subBucketCount (for clean direct indexing) that is large enough to
// provide unit resolution to at least largestValueWithSingleUnitResolution. So figure out
// largestValueWithSingleUnitResolution's nearest power-of-two (rounded up), and use that:
subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))

subBucketHalfCountMagnitude := subBucketCountMagnitude
if subBucketHalfCountMagnitude < 1 {
subBucketHalfCountMagnitude = 1
}
subBucketHalfCountMagnitude--

unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
unitMagnitude := int32(math.Floor(math.Log2(float64(lowestDiscernibleValue))))
if unitMagnitude < 0 {
unitMagnitude = 0
}
Expand All @@ -98,20 +116,16 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
// determine exponent range needed to support the trackable value with no
// overflow:
smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
bucketsNeeded := int32(1)
for smallestUntrackableValue < maxValue {
smallestUntrackableValue <<= 1
bucketsNeeded++
}
bucketsNeeded := getBucketsNeededToCoverValue(smallestUntrackableValue, highestTrackableValue)

bucketCount := bucketsNeeded
countsLen := (bucketCount + 1) * (subBucketCount / 2)

return &Histogram{
lowestTrackableValue: minValue,
highestTrackableValue: maxValue,
lowestDiscernibleValue: lowestDiscernibleValue,
highestTrackableValue: highestTrackableValue,
unitMagnitude: int64(unitMagnitude),
significantFigures: int64(sigfigs),
significantFigures: int64(numberOfSignificantValueDigits),
subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
subBucketHalfCount: subBucketHalfCount,
subBucketMask: subBucketMask,
Expand All @@ -126,6 +140,21 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
}
}

func getBucketsNeededToCoverValue(smallestUntrackableValue int64, maxValue int64) int32 {
// always have at least 1 bucket
bucketsNeeded := int32(1)
for smallestUntrackableValue < maxValue {
if smallestUntrackableValue > (math.MaxInt64 / 2) {
// next shift will overflow, meaning that bucket could represent values up to ones greater than
// math.MaxInt64, so it's the last bucket
return bucketsNeeded + 1
}
smallestUntrackableValue <<= 1
bucketsNeeded++
}
return bucketsNeeded
}

// ByteSize returns an estimate of the amount of memory allocated to the
// histogram in bytes.
//
Expand Down Expand Up @@ -277,7 +306,12 @@ func (h *Histogram) setCountAtIndex(idx int, n int64) {
h.totalCount += n
}

// ValueAtQuantile returns the recorded value at the given quantile (0..100).
// ValueAtQuantile returns the largest value that (100% - percentile) of the overall recorded value entries
// in the histogram are either larger than or equivalent to.
//
// Note that two values are "equivalent" if `ValuesAreEquivalent(value1,value2)` would return true.
//
// Returns 0 if no recorded values exist.
func (h *Histogram) ValueAtQuantile(q float64) int64 {
if q > 100 {
q = 100
Expand All @@ -290,13 +324,24 @@ func (h *Histogram) ValueAtQuantile(q float64) int64 {
for i.next() {
total += i.countAtIdx
if total >= countAtPercentile {
if q == 0.0 {
return h.lowestEquivalentValue(i.valueFromIdx)
}
return h.highestEquivalentValue(i.valueFromIdx)
}
}

return 0
}

// Determine if two values are equivalent with the histogram's resolution.
// Where "equivalent" means that value samples recorded for any two
// equivalent values are counted in a common total count.
func (h *Histogram) ValuesAreEquivalent(value1, value2 int64) (result bool) {
result = h.lowestEquivalentValue(value1) == h.lowestEquivalentValue(value2)
return
}

// CumulativeDistribution returns an ordered list of brackets of the
// distribution of recorded values.
func (h *Histogram) CumulativeDistribution() []Bracket {
Expand All @@ -323,7 +368,7 @@ func (h *Histogram) SignificantFigures() int64 {
// LowestTrackableValue returns the lower bound on values that will be added
// to the histogram
func (h *Histogram) LowestTrackableValue() int64 {
return h.lowestTrackableValue
return h.lowestDiscernibleValue
}

// HighestTrackableValue returns the upper bound on values that will be added
Expand Down Expand Up @@ -361,7 +406,7 @@ func (h *Histogram) Distribution() (result []Bar) {
func (h *Histogram) Equals(other *Histogram) bool {
switch {
case
h.lowestTrackableValue != other.lowestTrackableValue,
h.lowestDiscernibleValue != other.lowestDiscernibleValue,
h.highestTrackableValue != other.highestTrackableValue,
h.unitMagnitude != other.unitMagnitude,
h.significantFigures != other.significantFigures,
Expand All @@ -387,7 +432,7 @@ func (h *Histogram) Equals(other *Histogram) bool {
// Import to construct a new Histogram with the same state.
func (h *Histogram) Export() *Snapshot {
return &Snapshot{
LowestTrackableValue: h.lowestTrackableValue,
LowestTrackableValue: h.lowestDiscernibleValue,
HighestTrackableValue: h.highestTrackableValue,
SignificantFigures: h.significantFigures,
Counts: append([]int64(nil), h.counts...), // copy
Expand Down Expand Up @@ -478,12 +523,21 @@ func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
return bucketBaseIdx + offsetInBucket
}

// return the lowest (and therefore highest precision) bucket index that can represent the value
// Calculates the number of powers of two by which the value is greater than the biggest value that fits in
// bucket 0. This is the bucket index since each successive bucket can hold a value 2x greater.
func (h *Histogram) getBucketIndex(v int64) int32 {
pow2Ceiling := bitLen(v | h.subBucketMask)
return int32(pow2Ceiling - int64(h.unitMagnitude) -
int64(h.subBucketHalfCountMagnitude+1))
}

// For bucketIndex 0, this is just value, so it may be anywhere in 0 to subBucketCount.
// For other bucketIndex, this will always end up in the top half of subBucketCount: assume that for some bucket
// k > 0, this calculation will yield a value in the bottom half of 0 to subBucketCount. Then, because of how
// buckets overlap, it would have also been in the top half of bucket k-1, and therefore would have
// returned k-1 in getBucketIndex(). Since we would then shift it one fewer bits here, it would be twice as big,
// and therefore in the top half of subBucketCount.
func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 {
return int32(v >> uint(int64(idx)+int64(h.unitMagnitude)))
}
Expand All @@ -505,11 +559,11 @@ type iterator struct {
highestEquivalentValue int64
}

// Returns the next element in the iteration.
func (i *iterator) next() bool {
if i.countToIdx >= i.h.totalCount {
return false
}

// increment bucket
i.subBucketIdx++
if i.subBucketIdx >= i.h.subBucketCount {
Expand Down
2 changes: 1 addition & 1 deletion hdr_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (h *Histogram) encodeIntoByteBuffer() (*bytes.Buffer, error) {
if err != nil {
return nil, err
}
err = binary.Write(toCompress, binary.BigEndian, h.lowestTrackableValue) // 16-23
err = binary.Write(toCompress, binary.BigEndian, h.lowestDiscernibleValue) // 16-23
if err != nil {
return nil, err
}
Expand Down
16 changes: 15 additions & 1 deletion hdr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ func TestValueAtQuantile(t *testing.T) {
}
}


func TestMean(t *testing.T) {
h := hdrhistogram.New(1, 10000000, 3)
for i := 0; i < 1000000; i++ {
Expand Down Expand Up @@ -386,3 +385,18 @@ func TestEquals(t *testing.T) {
t.Error("Expected Histograms to be equivalent")
}
}

// nolint
func TestHistogram_ValuesAreEquivalent(t *testing.T) {
hist := hdrhistogram.New(1476573605, 1476593605, 3)
assert.True(t, hist.ValuesAreEquivalent(1476583605, 2147483647))

// test large histograms
hist = hdrhistogram.New(20000000, 100000000, 5)
hist.RecordValue(100000000)
hist.RecordValue(20000000)
hist.RecordValue(30000000)
assert.True(t, hist.ValuesAreEquivalent(20000000, hist.ValueAtQuantile(50.0)))
assert.True(t, hist.ValuesAreEquivalent(100000000, hist.ValueAtQuantile(83.34)))
assert.True(t, hist.ValuesAreEquivalent(100000000, hist.ValueAtQuantile(99.0)))
}
15 changes: 15 additions & 0 deletions hdr_whitebox_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package hdrhistogram

import (
"github.com/stretchr/testify/assert"
"testing"
)

func TestHistogram_New_internals(t *testing.T) {
// test for numberOfSignificantValueDigits if higher than 5 the numberOfSignificantValueDigits will be forced to 5
hist := New(1, 9007199254740991, 6)
assert.Equal(t, int64(5), hist.significantFigures)
// test for numberOfSignificantValueDigits if lower than 1 the numberOfSignificantValueDigits will be forced to 1
hist = New(1, 9007199254740991, 0)
assert.Equal(t, int64(1), hist.significantFigures)
}

0 comments on commit 1dc8842

Please sign in to comment.