Skip to content

Commit

Permalink
add support for numPlainTextBytesIndexed metric
Browse files Browse the repository at this point in the history
  • Loading branch information
mschoch committed Mar 5, 2016
1 parent 81780f9 commit 23a323b
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 49 deletions.
15 changes: 15 additions & 0 deletions document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,18 @@ func (d *Document) GoString() string {
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
}

func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}
68 changes: 68 additions & 0 deletions document/document_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.

package document

import (
"testing"
)

func TestDocumentNumPlainTextBytes(t *testing.T) {

tests := []struct {
doc *Document
num uint64
}{
{
doc: NewDocument("a"),
num: 0,
},
{
doc: NewDocument("b").
AddField(NewTextField("name", nil, []byte("hello"))),
num: 5,
},
{
doc: NewDocument("c").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))),
num: 6,
},
{
doc: NewDocument("d").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)),
num: 14,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, nil)),
num: 28,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, []string{"age"})),
num: 20,
},
}

for _, test := range tests {
actual := test.doc.NumPlainTextBytes()
if actual != test.num {
t.Errorf("expected doc '%s' to have %d plain text bytes, got %d", test.doc.ID, test.num, actual)
}
}
}
5 changes: 5 additions & 0 deletions document/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,9 @@ type Field interface {
Options() IndexingOptions
Analyze() (int, analysis.TokenFrequencies)
Value() []byte

// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
}
33 changes: 21 additions & 12 deletions document/field_boolean.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (
const DefaultBooleanIndexingOptions = StoreField | IndexField

type BooleanField struct {
name string
arrayPositions []uint64
options IndexingOptions
value []byte
name string
arrayPositions []uint64
options IndexingOptions
value []byte
numPlainTextBytes uint64
}

func (b *BooleanField) Name() string {
Expand Down Expand Up @@ -66,12 +67,17 @@ func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}

func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}

func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}

Expand All @@ -80,14 +86,17 @@ func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField
}

func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}
11 changes: 9 additions & 2 deletions document/field_composite.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ func (c *CompositeField) Value() []byte {
return []byte{}
}

func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}

func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
Expand All @@ -79,8 +83,11 @@ func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFr
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}

if shouldInclude {
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}
Expand Down
25 changes: 17 additions & 8 deletions document/field_datetime.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)

type DateTimeField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}

func (n *DateTimeField) Name() string {
Expand Down Expand Up @@ -95,12 +96,17 @@ func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}

func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}

func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}

Expand All @@ -117,6 +123,9 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)
Expand Down
25 changes: 17 additions & 8 deletions document/field_numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ const DefaultNumericIndexingOptions = StoreField | IndexField
const DefaultPrecisionStep uint = 4

type NumericField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}

func (n *NumericField) Name() string {
Expand Down Expand Up @@ -91,12 +92,17 @@ func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}

func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}

func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}

Expand All @@ -112,5 +118,8 @@ func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, nu
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}
46 changes: 27 additions & 19 deletions document/field_text.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ import (
const DefaultTextIndexingOptions = IndexField

type TextField struct {
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
numPlainTextBytes uint64
}

func (t *TextField) Name() string {
Expand Down Expand Up @@ -72,35 +73,42 @@ func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}

func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}

func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}

func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
5 changes: 5 additions & 0 deletions index/firestorm/firestorm.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {

// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(f, doc, resultChan)

Expand Down Expand Up @@ -183,6 +184,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
f.dictUpdater.NotifyBatch(dictionaryDeltas)

atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
return
}

Expand Down Expand Up @@ -302,11 +304,13 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {

var docsUpdated uint64
var docsDeleted uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
doc.Number = firstDocNumber // actually assign doc numbers here
firstDocNumber++
docsUpdated++
numPlainTextBytes += doc.NumPlainTextBytes()
} else {
docsDeleted++
}
Expand Down Expand Up @@ -411,6 +415,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&f.stats.updates, docsUpdated)
atomic.AddUint64(&f.stats.deletes, docsDeleted)
atomic.AddUint64(&f.stats.batches, 1)
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&f.stats.errors, 1)
}
Expand Down
Loading

0 comments on commit 23a323b

Please sign in to comment.