120 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			120 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| //  Copyright (c) 2014 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package document
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 
 | |
| 	"github.com/blevesearch/bleve/analysis"
 | |
| )
 | |
| 
 | |
| const DefaultTextIndexingOptions = IndexField | DocValues
 | |
| 
 | |
| type TextField struct {
 | |
| 	name              string
 | |
| 	arrayPositions    []uint64
 | |
| 	options           IndexingOptions
 | |
| 	analyzer          *analysis.Analyzer
 | |
| 	value             []byte
 | |
| 	numPlainTextBytes uint64
 | |
| }
 | |
| 
 | |
| func (t *TextField) Name() string {
 | |
| 	return t.name
 | |
| }
 | |
| 
 | |
| func (t *TextField) ArrayPositions() []uint64 {
 | |
| 	return t.arrayPositions
 | |
| }
 | |
| 
 | |
| func (t *TextField) Options() IndexingOptions {
 | |
| 	return t.options
 | |
| }
 | |
| 
 | |
| func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
 | |
| 	var tokens analysis.TokenStream
 | |
| 	if t.analyzer != nil {
 | |
| 		bytesToAnalyze := t.Value()
 | |
| 		if t.options.IsStored() {
 | |
| 			// need to copy
 | |
| 			bytesCopied := make([]byte, len(bytesToAnalyze))
 | |
| 			copy(bytesCopied, bytesToAnalyze)
 | |
| 			bytesToAnalyze = bytesCopied
 | |
| 		}
 | |
| 		tokens = t.analyzer.Analyze(bytesToAnalyze)
 | |
| 	} else {
 | |
| 		tokens = analysis.TokenStream{
 | |
| 			&analysis.Token{
 | |
| 				Start:    0,
 | |
| 				End:      len(t.value),
 | |
| 				Term:     t.value,
 | |
| 				Position: 1,
 | |
| 				Type:     analysis.AlphaNumeric,
 | |
| 			},
 | |
| 		}
 | |
| 	}
 | |
| 	fieldLength := len(tokens) // number of tokens in this doc field
 | |
| 	tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors())
 | |
| 	return fieldLength, tokenFreqs
 | |
| }
 | |
| 
 | |
| func (t *TextField) Value() []byte {
 | |
| 	return t.value
 | |
| }
 | |
| 
 | |
| func (t *TextField) GoString() string {
 | |
| 	return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
 | |
| }
 | |
| 
 | |
| func (t *TextField) NumPlainTextBytes() uint64 {
 | |
| 	return t.numPlainTextBytes
 | |
| }
 | |
| 
 | |
| func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
 | |
| 	return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
 | |
| }
 | |
| 
 | |
| func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
 | |
| 	return &TextField{
 | |
| 		name:              name,
 | |
| 		arrayPositions:    arrayPositions,
 | |
| 		options:           options,
 | |
| 		value:             value,
 | |
| 		numPlainTextBytes: uint64(len(value)),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
 | |
| 	return &TextField{
 | |
| 		name:              name,
 | |
| 		arrayPositions:    arrayPositions,
 | |
| 		options:           DefaultTextIndexingOptions,
 | |
| 		analyzer:          analyzer,
 | |
| 		value:             value,
 | |
| 		numPlainTextBytes: uint64(len(value)),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
 | |
| 	return &TextField{
 | |
| 		name:              name,
 | |
| 		arrayPositions:    arrayPositions,
 | |
| 		options:           options,
 | |
| 		analyzer:          analyzer,
 | |
| 		value:             value,
 | |
| 		numPlainTextBytes: uint64(len(value)),
 | |
| 	}
 | |
| }
 |