作者:acgshar
项目:blev
func DictionaryCompoundFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
minWordSize := defaultMinWordSize
minSubWordSize := defaultMinSubWordSize
maxSubWordSize := defaultMaxSubWordSize
onlyLongestMatch := defaultOnlyLongestMatch
minVal, ok := config["min_word_size"].(float64)
if ok {
minWordSize = int(minVal)
}
minSubVal, ok := config["min_subword_size"].(float64)
if ok {
minSubWordSize = int(minSubVal)
}
maxSubVal, ok := config["max_subword_size"].(float64)
if ok {
maxSubWordSize = int(maxSubVal)
}
onlyVal, ok := config["only_longest_match"].(bool)
if ok {
onlyLongestMatch = onlyVal
}
dictTokenMapName, ok := config["dict_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify dict_token_map")
}
dictTokenMap, err := cache.TokenMapNamed(dictTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building dict compound words filter: %v", err)
}
return NewDictionaryCompoundFilter(dictTokenMap, minWordSize, minSubWordSize, maxSubWordSize, onlyLongestMatch), nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
stopEnFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
possEnFilter,
toLowerFilter,
stopEnFilter,
stemmerEnFilter,
},
}
return &rv, nil
}
作者:acgshar
项目:blev
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision_filter.NewElisionFilter(articlesTokenMap), nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
stopItFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerItFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
elisionFilter,
stopItFilter,
stemmerItFilter,
},
}
return &rv, nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
stopCkbFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
normCkbFilter,
toLowerFilter,
stopCkbFilter,
stemmerCkbFilter,
},
}
return &rv, nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKC)
stopArFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
normalizeFilter,
stopArFilter,
normalizeArFilter,
stemmerArFilter,
},
}
return &rv, nil
}
作者:acgshar
项目:blev
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
remainingTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
return exception.NewExceptionsTokenizer(exceptionsRegexp, remainingTokenizer), nil
}
作者:acgshar
项目:blev
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
}
作者:acgshar
项目:blev
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
separator := DefaultSeparator
separatorVal, ok := config["separator"].(string)
if ok {
separator = separatorVal
}
fragmenterName, ok := config["fragmenter"].(string)
if !ok {
return nil, fmt.Errorf("must specify fragmenter")
}
fragmenter, err := cache.FragmenterNamed(fragmenterName)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatterName, ok := config["formatter"].(string)
if !ok {
return nil, fmt.Errorf("must specify formatter")
}
formatter, err := cache.FragmentFormatterNamed(formatterName)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return NewHighlighter(fragmenter, formatter, separator), nil
}
作者:acgshar
项目:blev
func ExceptionsTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
exceptions := []string{}
iexceptions, ok := config["exceptions"].([]interface{})
if ok {
for _, exception := range iexceptions {
exception, ok := exception.(string)
if ok {
exceptions = append(exceptions, exception)
}
}
}
aexceptions, ok := config["exceptions"].([]string)
if ok {
exceptions = append(exceptions, aexceptions...)
}
if len(exceptions) == 0 {
return nil, fmt.Errorf("no pattern found in 'exception' property")
}
exceptionPattern := strings.Join(exceptions, "|")
r, err := regexp.Compile(exceptionPattern)
if err != nil {
return nil, fmt.Errorf("unable to build regexp tokenizer: %v", err)
}
remainingName, ok := config["tokenizer"].(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer for remaining input")
}
remaining, err := cache.TokenizerNamed(remainingName)
if err != nil {
return nil, err
}
return NewExceptionsTokenizer(r, remaining), nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: keywordTokenizer,
}
return &rv, nil
}
作者:acgshar
项目:blev
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
stopTokenMapName, ok := config["stop_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify stop_token_map")
}
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building stop words filter: %v", err)
}
return NewStopTokensFilter(stopTokenMap), nil
}
作者:acgshar
项目:blev
func KeyWordMarkerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
keywordsTokenMapName, ok := config["keywords_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify keywords_token_map")
}
keywordsTokenMap, err := cache.TokenMapNamed(keywordsTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building keyword marker filter: %v", err)
}
return NewKeyWordMarkerFilter(keywordsTokenMap), nil
}
作者:acgshar
项目:blev
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMapName, ok := config["articles_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify articles_token_map")
}
articlesTokenMap, err := cache.TokenMapNamed(articlesTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return NewElisionFilter(articlesTokenMap), nil
}
作者:acgshar
项目:blev
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
if err != nil {
return nil, err
}
tokenFilters[i] = tokenFilter
}
return tokenFilters, nil
}
作者:acgshar
项目:blev
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
charFilters := make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter, err := cache.CharFilterNamed(charFilterName)
if err != nil {
return nil, err
}
charFilters[i] = charFilter
}
return charFilters, nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
kagomeTokenizer, err := cache.TokenizerNamed(TokenizerName)
if err != nil {
return nil, err
}
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
rv := analysis.Analyzer{
Tokenizer: kagomeTokenizer,
TokenFilters: []analysis.TokenFilter{
normalizeFilter,
},
}
return &rv, nil
}
作者:acgshar
项目:blev
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
fragmenter, err := cache.FragmenterNamed(simple_fragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatter, err := cache.FragmentFormatterNamed(html_formatter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return simple_highlighter.NewHighlighter(
fragmenter,
formatter,
simple_highlighter.DefaultSeparator),
nil
}
作者:acgshar
项目:blev
func (dm *DocumentMapping) validate(cache *registry.Cache) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
for _, property := range dm.Properties {
err = property.validate(cache)
if err != nil {
return err
}
}
for _, field := range dm.Fields {
if field.Analyzer != "" {
_, err = cache.AnalyzerNamed(field.Analyzer)
if err != nil {
return err
}
}
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
}
}
switch field.Type {
case "text", "datetime", "number", "boolean":
default:
return fmt.Errorf("unknown field type: '%s'", field.Type)
}
}
return nil
}
作者:acgshar
项目:blev
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(webt.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopEnFilter,
},
}
return &rv, nil
}