Golang github.com-acgshare-bleve-registry.NewCache类(方法)实例源码

下面列出了Golang github.com-acgshare-bleve-registry.NewCache 类(方法)源码代码实例,从而了解它的用法。

作者:acgshar    项目:blev   
func BenchmarkBatch(b *testing.B) {

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
	if err != nil {
		b.Fatal(err)
	}

	analysisQueue := index.NewAnalysisQueue(1)
	idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
	if err != nil {
		b.Fatal(err)
	}
	err = idx.Open()
	if err != nil {
		b.Fatal(err)
	}

	batch := index.NewBatch()
	for i := 0; i < 100; i++ {
		d := document.NewDocument(strconv.Itoa(i))
		f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
		d.AddField(f)
		batch.Update(d)
	}

	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		err = idx.Batch(batch)
		if err != nil {
			b.Fatal(err)
		}
	}
}

作者:acgshar    项目:blev   
func TestFrenchElision(t *testing.T) {
	tests := []struct {
		input  analysis.TokenStream
		output analysis.TokenStream
	}{
		{
			input: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("l'avion"),
				},
			},
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("avion"),
				},
			},
		},
	}

	cache := registry.NewCache()
	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := elisionFilter.Filter(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
		}
	}
}

作者:acgshar    项目:blev   
func BenchmarkAnalyze(b *testing.B) {

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
	if err != nil {
		b.Fatal(err)
	}

	analysisQueue := index.NewAnalysisQueue(1)
	idx, err := NewFirestorm(null.Name, nil, analysisQueue)
	if err != nil {
		b.Fatal(err)
	}

	d := document.NewDocument("1")
	f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
	d.AddField(f)

	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		rv := idx.Analyze(d)
		if len(rv.Rows) < 92 || len(rv.Rows) > 93 {
			b.Fatalf("expected 512-13 rows, got %d", len(rv.Rows))
		}
	}
}

作者:acgshar    项目:blev   
func TestElisionFilter(t *testing.T) {

	tests := []struct {
		input  analysis.TokenStream
		output analysis.TokenStream
	}{
		{
			input: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ar" + string(Apostrophe) + "word"),
				},
			},
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("word"),
				},
			},
		},
		{
			input: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ar" + string(RightSingleQuotationMark) + "word"),
				},
			},
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("word"),
				},
			},
		},
	}

	cache := registry.NewCache()

	articleListConfig := map[string]interface{}{
		"type":   token_map.Name,
		"tokens": []interface{}{"ar"},
	}
	_, err := cache.DefineTokenMap("articles_test", articleListConfig)
	if err != nil {
		t.Fatal(err)
	}

	elisionConfig := map[string]interface{}{
		"type":               "elision",
		"articles_token_map": "articles_test",
	}
	elisionFilter, err := cache.DefineTokenFilter("elision_test", elisionConfig)
	if err != nil {
		t.Fatal(err)
	}

	for _, test := range tests {

		actual := elisionFilter.Filter(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
		}
	}
}

作者:acgshar    项目:blev   
func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) {

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed("standard")
	if err != nil {
		b.Fatal(err)
	}

	b.ResetTimer()
	b.StopTimer()
	for i := 0; i < b.N; i++ {

		analysisQueue := index.NewAnalysisQueue(analysisWorkers)
		idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
		if err != nil {
			b.Fatal(err)
		}

		err = idx.Open()
		if err != nil {
			b.Fatal(err)
		}

		b.StartTimer()
		batch := index.NewBatch()
		for j := 0; j < 1000; j++ {
			if j%batchSize == 0 {
				if len(batch.IndexOps) > 0 {
					err := idx.Batch(batch)
					if err != nil {
						b.Fatal(err)
					}
				}
				batch = index.NewBatch()
			}
			indexDocument := document.NewDocument("").
				AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[j%10]), analyzer))
			indexDocument.ID = strconv.Itoa(i) + "-" + strconv.Itoa(j)
			batch.Update(indexDocument)
		}
		// close last batch
		if len(batch.IndexOps) > 0 {
			err := idx.Batch(batch)
			if err != nil {
				b.Fatal(err)
			}
		}
		b.StopTimer()
		err = idx.Close()
		if err != nil {
			b.Fatal(err)
		}
		err = destroy()
		if err != nil {
			b.Fatal(err)
		}
		analysisQueue.Close()
	}
}

作者:acgshar    项目:blev   
func TestStopWordsFilterLongestMatch(t *testing.T) {

	inputTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term:     []byte("softestball"),
			Start:    0,
			End:      11,
			Position: 1,
		},
	}

	expectedTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term:     []byte("softestball"),
			Start:    0,
			End:      11,
			Position: 1,
		},
		&analysis.Token{
			Term:     []byte("softest"),
			Start:    0,
			End:      7,
			Position: 1,
		},
		&analysis.Token{
			Term:     []byte("ball"),
			Start:    7,
			End:      11,
			Position: 1,
		},
	}

	cache := registry.NewCache()
	dictListConfig := map[string]interface{}{
		"type":   token_map.Name,
		"tokens": []interface{}{"soft", "softest", "ball"},
	}
	_, err := cache.DefineTokenMap("dict_test", dictListConfig)
	if err != nil {
		t.Fatal(err)
	}

	dictConfig := map[string]interface{}{
		"type":               "dict_compound",
		"dict_token_map":     "dict_test",
		"only_longest_match": true,
	}
	dictFilter, err := cache.DefineTokenFilter("dict_test", dictConfig)
	if err != nil {
		t.Fatal(err)
	}

	ouputTokenStream := dictFilter.Filter(inputTokenStream)
	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
	}
}

作者:acgshar    项目:blev   
func TestStopWordsFilter(t *testing.T) {

	inputTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term: []byte("a"),
		},
		&analysis.Token{
			Term: []byte("walk"),
		},
		&analysis.Token{
			Term: []byte("in"),
		},
		&analysis.Token{
			Term: []byte("the"),
		},
		&analysis.Token{
			Term: []byte("park"),
		},
	}

	expectedTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term: []byte("walk"),
		},
		&analysis.Token{
			Term: []byte("park"),
		},
	}

	cache := registry.NewCache()
	stopListConfig := map[string]interface{}{
		"type":   token_map.Name,
		"tokens": []interface{}{"a", "in", "the"},
	}
	_, err := cache.DefineTokenMap("stop_test", stopListConfig)
	if err != nil {
		t.Fatal(err)
	}

	stopConfig := map[string]interface{}{
		"type":           "stop_tokens",
		"stop_token_map": "stop_test",
	}
	stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
	if err != nil {
		t.Fatal(err)
	}

	ouputTokenStream := stopFilter.Filter(inputTokenStream)
	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
	}
}

作者:acgshar    项目:blev   
func TestSoraniAnalyzer(t *testing.T) {
	tests := []struct {
		input  []byte
		output analysis.TokenStream
	}{
		// stop word removal
		{
			input: []byte("ئەم پیاوە"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("پیاو"),
					Position: 2,
					Start:    7,
					End:      17,
				},
			},
		},
		{
			input: []byte("پیاوە"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("پیاو"),
					Position: 1,
					Start:    0,
					End:      10,
				},
			},
		},
		{
			input: []byte("پیاو"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("پیاو"),
					Position: 1,
					Start:    0,
					End:      8,
				},
			},
		},
	}

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := analyzer.Analyze(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %v, got %v", test.output, actual)
		}
	}
}

作者:acgshar    项目:blev   
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
func NewIndexMapping() *IndexMapping {
	return &IndexMapping{
		TypeMapping:           make(map[string]*DocumentMapping),
		DefaultMapping:        NewDocumentMapping(),
		TypeField:             defaultTypeField,
		DefaultType:           defaultType,
		DefaultAnalyzer:       defaultAnalyzer,
		DefaultDateTimeParser: defaultDateTimeParser,
		DefaultField:          defaultField,
		ByteArrayConverter:    defaultByteArrayConverter,
		CustomAnalysis:        newCustomAnalysis(),
		cache:                 registry.NewCache(),
	}
}

作者:acgshar    项目:blev   
func TestJaAnalyzer(t *testing.T) {
	tests := []struct {
		input  []byte
		output analysis.TokenStream
	}{
		{
			input: []byte("こんにちは世界"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("こんにちは"),
					Type:     analysis.Ideographic,
					Position: 1,
					Start:    0,
					End:      15,
				},
				&analysis.Token{
					Term:     []byte("世界"),
					Type:     analysis.Ideographic,
					Position: 2,
					Start:    15,
					End:      21,
				},
			},
		},
		{
			input: []byte("カタカナ"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("カタカナ"),
					Type:     analysis.Ideographic,
					Position: 1,
					Start:    0,
					End:      12,
				},
			},
		},
	}

	cache := registry.NewCache()
	for _, test := range tests {
		analyzer, err := cache.AnalyzerNamed(AnalyzerName)
		if err != nil {
			t.Fatal(err)
		}
		actual := analyzer.Analyze(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %v, got %v", test.output, actual)
		}
	}
}

作者:acgshar    项目:blev   
func BenchmarkAnalysis(b *testing.B) {
	for i := 0; i < b.N; i++ {

		cache := registry.NewCache()
		analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
		if err != nil {
			b.Fatal(err)
		}

		ts := analyzer.Analyze(bleveWikiArticle)
		freqs := analysis.TokenFrequency(ts, nil, true)
		if len(freqs) != 511 {
			b.Errorf("expected %d freqs, got %d", 511, len(freqs))
		}
	}
}

作者:acgshar    项目:blev   
func TestPortugueseAnalyzer(t *testing.T) {
	tests := []struct {
		input  []byte
		output analysis.TokenStream
	}{
		// stemming
		{
			input: []byte("quilométricas"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("quilometric"),
				},
			},
		},
		{
			input: []byte("quilométricos"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("quilometric"),
				},
			},
		},
		// stop word
		{
			input:  []byte("não"),
			output: analysis.TokenStream{},
		},
	}

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := analyzer.Analyze(test.input)
		if len(actual) != len(test.output) {
			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
		}
		for i, tok := range actual {
			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
			}
		}
	}
}

作者:acgshar    项目:blev   
func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) {

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed("standard")
	if err != nil {
		b.Fatal(err)
	}

	indexDocument := document.NewDocument("").
		AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[0]), analyzer))

	b.ResetTimer()
	b.StopTimer()
	for i := 0; i < b.N; i++ {
		analysisQueue := index.NewAnalysisQueue(analysisWorkers)
		idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
		if err != nil {
			b.Fatal(err)
		}

		err = idx.Open()
		if err != nil {
			b.Fatal(err)
		}
		indexDocument.ID = strconv.Itoa(i)
		// just time the indexing portion
		b.StartTimer()
		err = idx.Update(indexDocument)
		if err != nil {
			b.Fatal(err)
		}
		b.StopTimer()
		err = idx.Close()
		if err != nil {
			b.Fatal(err)
		}
		err = destroy()
		if err != nil {
			b.Fatal(err)
		}
		analysisQueue.Close()
	}
}

作者:acgshar    项目:blev   
func TestItalianLightStemmer(t *testing.T) {
	tests := []struct {
		input  analysis.TokenStream
		output analysis.TokenStream
	}{
		{
			input: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ragazzo"),
				},
			},
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ragazz"),
				},
			},
		},
		{
			input: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ragazzi"),
				},
			},
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("ragazz"),
				},
			},
		},
	}

	cache := registry.NewCache()
	filter, err := cache.TokenFilterNamed(LightStemmerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := filter.Filter(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
		}
	}
}

作者:acgshar    项目:blev   
func TestHindiAnalyzer(t *testing.T) {
	tests := []struct {
		input  []byte
		output analysis.TokenStream
	}{
		// two ways to write 'hindi' itself
		{
			input: []byte("हिन्दी"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("हिंद"),
					Position: 1,
					Start:    0,
					End:      18,
				},
			},
		},
		{
			input: []byte("हिंदी"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("हिंद"),
					Position: 1,
					Start:    0,
					End:      15,
				},
			},
		},
	}

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := analyzer.Analyze(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %v, got %v", test.output, actual)
		}
	}
}

作者:acgshar    项目:blev   
//.........这里部分代码省略.........
					Term:     []byte("رُوبرت"),
					Type:     analysis.AlphaNumeric,
					Position: 2,
					Start:    4,
					End:      16,
				},
				&analysis.Token{
					Term:     []byte("موير"),
					Type:     analysis.AlphaNumeric,
					Position: 3,
					Start:    17,
					End:      25,
				},
			},
		},
		{
			input: []byte("?艱鍟䇹愯瀛"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("?艱"),
					Type:     analysis.Double,
					Position: 1,
					Start:    0,
					End:      7,
				},
				&analysis.Token{
					Term:     []byte("艱鍟"),
					Type:     analysis.Double,
					Position: 2,
					Start:    4,
					End:      10,
				},
				&analysis.Token{
					Term:     []byte("鍟䇹"),
					Type:     analysis.Double,
					Position: 3,
					Start:    7,
					End:      13,
				},
				&analysis.Token{
					Term:     []byte("䇹愯"),
					Type:     analysis.Double,
					Position: 4,
					Start:    10,
					End:      16,
				},
				&analysis.Token{
					Term:     []byte("愯瀛"),
					Type:     analysis.Double,
					Position: 5,
					Start:    13,
					End:      19,
				},
			},
		},
		{
			input: []byte("一"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("一"),
					Type:     analysis.Single,
					Position: 1,
					Start:    0,
					End:      3,
				},
			},
		},
		{
			input: []byte("一丁丂"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("一丁"),
					Type:     analysis.Double,
					Position: 1,
					Start:    0,
					End:      6,
				},
				&analysis.Token{
					Term:     []byte("丁丂"),
					Type:     analysis.Double,
					Position: 2,
					Start:    3,
					End:      9,
				},
			},
		},
	}

	cache := registry.NewCache()
	for _, test := range tests {
		analyzer, err := cache.AnalyzerNamed(AnalyzerName)
		if err != nil {
			t.Fatal(err)
		}
		actual := analyzer.Analyze(test.input)
		if !reflect.DeepEqual(actual, test.output) {
			t.Errorf("expected %v, got %v", test.output, actual)
		}
	}
}

作者:acgshar    项目:blev   
func TestPersianAnalyzerOthers(t *testing.T) {
	tests := []struct {
		input  []byte
		output analysis.TokenStream
	}{
		// nouns
		{
			input: []byte("برگ ها"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("برگ"),
				},
			},
		},
		{
			input: []byte("برگ‌ها"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("برگ"),
				},
			},
		},
		// non persian
		{
			input: []byte("English test."),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("english"),
				},
				&analysis.Token{
					Term: []byte("test"),
				},
			},
		},
		// others
		{
			input: []byte("خورده مي شده بوده باشد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		{
			input: []byte("برگ‌ها"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("برگ"),
				},
			},
		},
	}

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := analyzer.Analyze(test.input)
		if len(actual) != len(test.output) {
			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
		}
		for i, tok := range actual {
			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
			}
		}
	}
}

作者:acgshar    项目:blev   
//.........这里部分代码省略.........
		// passive future indicative
		{
			input: []byte("خورده خواهد شد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive present progressive indicative
		{
			input: []byte("دارد خورده مي شود"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive preterite progressive indicative
		{
			input: []byte("داشت خورده مي شد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive present subjunctive
		{
			input: []byte("خورده شود"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive preterite subjunctive
		{
			input: []byte("خورده شده باشد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive imperfective preterite subjunctive
		{
			input: []byte("خورده مي شده باشد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive pluperfect subjunctive
		{
			input: []byte("خورده شده بوده باشد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// passive imperfective pluperfect subjunctive
		{
			input: []byte("خورده مي شده بوده باشد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("خورده"),
				},
			},
		},
		// active present subjunctive
		{
			input: []byte("بخورد"),
			output: analysis.TokenStream{
				&analysis.Token{
					Term: []byte("بخورد"),
				},
			},
		},
	}

	cache := registry.NewCache()
	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
	if err != nil {
		t.Fatal(err)
	}
	for _, test := range tests {
		actual := analyzer.Analyze(test.input)
		if len(actual) != len(test.output) {
			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
		}
		for i, tok := range actual {
			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
			}
		}
	}
}

作者:acgshar    项目:blev   
//.........这里部分代码省略.........
					`[fF][iI][lL][eE]://(\S)*`,
					`[fF][tT][pP]://(\S)*`,
				},
			},
			result: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("what"),
					Position: 1,
					Start:    0,
					End:      4,
				},
				&analysis.Token{
					Term:     []byte("ftp://blevesearch.com/"),
					Position: 2,
					Start:    5,
					End:      27,
				},
				&analysis.Token{
					Term:     []byte("songs"),
					Position: 3,
					Start:    28,
					End:      33,
				},
			},
		},
		{
			input: []byte("please email [email protected] the URL https://blevesearch.com/"),
			config: map[string]interface{}{
				"type":      "exception",
				"tokenizer": "unicode",
				"exceptions": []interface{}{
					`[hH][tT][tT][pP][sS]?://(\S)*`,
					`[fF][iI][lL][eE]://(\S)*`,
					`[fF][tT][pP]://(\S)*`,
					`\[email protected]\S+`,
				},
			},
			result: analysis.TokenStream{
				&analysis.Token{
					Term:     []byte("please"),
					Position: 1,
					Start:    0,
					End:      6,
				},
				&analysis.Token{
					Term:     []byte("email"),
					Position: 2,
					Start:    7,
					End:      12,
				},
				&analysis.Token{
					Term:     []byte("[email protected]"),
					Position: 3,
					Start:    13,
					End:      32,
				},
				&analysis.Token{
					Term:     []byte("the"),
					Position: 4,
					Start:    33,
					End:      36,
				},
				&analysis.Token{
					Term:     []byte("URL"),
					Position: 5,
					Start:    37,
					End:      40,
				},
				&analysis.Token{
					Term:     []byte("https://blevesearch.com/"),
					Position: 6,
					Start:    41,
					End:      65,
				},
			},
		},
	}

	// remaining := unicode.NewUnicodeTokenizer()
	for _, test := range tests {

		// build the requested exception tokenizer
		cache := registry.NewCache()
		tokenizer, err := cache.DefineTokenizer("custom", test.config)
		if err != nil {
			t.Fatal(err)
		}

		// pattern := strings.Join(test.patterns, "|")
		// r, err := regexp.Compile(pattern)
		// if err != nil {
		// 	t.Fatal(err)
		// }
		// tokenizer := NewExceptionsTokenizer(r, remaining)
		actual := tokenizer.Tokenize(test.input)
		if !reflect.DeepEqual(actual, test.result) {
			t.Errorf("expected %v, got %v", test.result, actual)
		}
	}
}

作者:acgshar    项目:blev   
// UnmarshalJSON deserializes a JSON representation of the IndexMapping
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
	var tmp struct {
		TypeMapping           map[string]*DocumentMapping `json:"types"`
		DefaultMapping        *DocumentMapping            `json:"default_mapping"`
		TypeField             string                      `json:"type_field"`
		DefaultType           string                      `json:"default_type"`
		DefaultAnalyzer       string                      `json:"default_analyzer"`
		DefaultDateTimeParser string                      `json:"default_datetime_parser"`
		DefaultField          string                      `json:"default_field"`
		ByteArrayConverter    string                      `json:"byte_array_converter"`
		CustomAnalysis        *customAnalysis             `json:"analysis"`
	}
	err := json.Unmarshal(data, &tmp)
	if err != nil {
		return err
	}

	im.cache = registry.NewCache()

	im.CustomAnalysis = newCustomAnalysis()
	if tmp.CustomAnalysis != nil {
		if tmp.CustomAnalysis.CharFilters != nil {
			im.CustomAnalysis.CharFilters = tmp.CustomAnalysis.CharFilters
		}
		if tmp.CustomAnalysis.Tokenizers != nil {
			im.CustomAnalysis.Tokenizers = tmp.CustomAnalysis.Tokenizers
		}
		if tmp.CustomAnalysis.TokenMaps != nil {
			im.CustomAnalysis.TokenMaps = tmp.CustomAnalysis.TokenMaps
		}
		if tmp.CustomAnalysis.TokenFilters != nil {
			im.CustomAnalysis.TokenFilters = tmp.CustomAnalysis.TokenFilters
		}
		if tmp.CustomAnalysis.Analyzers != nil {
			im.CustomAnalysis.Analyzers = tmp.CustomAnalysis.Analyzers
		}
		if tmp.CustomAnalysis.DateTimeParsers != nil {
			im.CustomAnalysis.DateTimeParsers = tmp.CustomAnalysis.DateTimeParsers
		}
	}

	im.TypeField = defaultTypeField
	if tmp.TypeField != "" {
		im.TypeField = tmp.TypeField
	}

	im.DefaultType = defaultType
	if tmp.DefaultType != "" {
		im.DefaultType = tmp.DefaultType
	}

	im.DefaultAnalyzer = defaultAnalyzer
	if tmp.DefaultAnalyzer != "" {
		im.DefaultAnalyzer = tmp.DefaultAnalyzer
	}

	im.DefaultDateTimeParser = defaultDateTimeParser
	if tmp.DefaultDateTimeParser != "" {
		im.DefaultDateTimeParser = tmp.DefaultDateTimeParser
	}

	im.DefaultField = defaultField
	if tmp.DefaultField != "" {
		im.DefaultField = tmp.DefaultField
	}

	im.ByteArrayConverter = defaultByteArrayConverter
	if tmp.ByteArrayConverter != "" {
		im.ByteArrayConverter = tmp.ByteArrayConverter
	}

	im.DefaultMapping = NewDocumentMapping()
	if tmp.DefaultMapping != nil {
		im.DefaultMapping = tmp.DefaultMapping
	}

	im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping))
	for typeName, typeDocMapping := range tmp.TypeMapping {
		im.TypeMapping[typeName] = typeDocMapping
	}

	err = im.CustomAnalysis.registerAll(im)
	if err != nil {
		return err
	}

	return nil
}


问题


面经


文章

微信
公众号

扫码关注公众号