chewy: Add Chinese search optimization

This commit is contained in:
bgme 2026-01-30 23:33:00 +08:00
parent db943c43c8
commit 9e67b74d66
4 changed files with 100 additions and 6 deletions

View file

@ -25,12 +25,26 @@ class AccountsIndex < Chewy::Index
output_unigrams: true, output_unigrams: true,
token_separator: '', token_separator: '',
}, },
tsconvert: {
type: "stconvert",
delimiter: "#",
keep_both: false,
convert_type: "t2s"
}
},
char_filter: {
tsconvert: {
type: 'stconvert',
convert_type: 't2s',
},
}, },
analyzer: { analyzer: {
# "The FOOING's bar" becomes "foo bar" # "The FOOING's bar" becomes "foo bar"
natural: { natural: {
tokenizer: 'standard', tokenizer: 'ik_max_word',
filter: %w( filter: %w(
lowercase lowercase
asciifolding asciifolding
@ -40,12 +54,14 @@ class AccountsIndex < Chewy::Index
english_stop english_stop
english_stemmer english_stemmer
), ),
char_filter: %w(tsconvert),
}, },
# "FOO bar" becomes "foo bar" # "FOO bar" becomes "foo bar"
verbatim: { verbatim: {
tokenizer: 'standard', tokenizer: 'ik_max_word',
filter: %w(lowercase asciifolding cjk_width), filter: %w(lowercase asciifolding cjk_width),
char_filter: %w(tsconvert),
}, },
# "Foo bar" becomes "foo bar foobar" # "Foo bar" becomes "foo bar foobar"

View file

@ -19,6 +19,20 @@ class PublicStatusesIndex < Chewy::Index
type: 'stemmer', type: 'stemmer',
language: 'possessive_english', language: 'possessive_english',
}, },
tsconvert: {
type: "stconvert",
delimiter: "#",
keep_both: false,
convert_type: "t2s"
}
},
char_filter: {
tsconvert: {
type: 'stconvert',
convert_type: 't2s',
},
}, },
analyzer: { analyzer: {
@ -28,7 +42,7 @@ class PublicStatusesIndex < Chewy::Index
}, },
content: { content: {
tokenizer: 'standard', tokenizer: 'ik_max_word',
filter: %w( filter: %w(
lowercase lowercase
asciifolding asciifolding
@ -38,6 +52,21 @@ class PublicStatusesIndex < Chewy::Index
english_stop english_stop
english_stemmer english_stemmer
), ),
char_filter: %w(tsconvert),
},
content_search: {
tokenizer: 'ik_smart',
filter: %w(
lowercase
asciifolding
cjk_width
elision
english_possessive_stemmer
english_stop
english_stemmer
),
char_filter: %w(tsconvert),
}, },
hashtag: { hashtag: {
@ -48,6 +77,7 @@ class PublicStatusesIndex < Chewy::Index
asciifolding asciifolding
cjk_width cjk_width
), ),
char_filter: %w(tsconvert),
}, },
}, },
} }
@ -60,7 +90,7 @@ class PublicStatusesIndex < Chewy::Index
root date_detection: false do root date_detection: false do
field(:id, type: 'long') field(:id, type: 'long')
field(:account_id, type: 'long') field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content', search_analyzer: 'content_search') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:language, type: 'keyword') field(:language, type: 'keyword')
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })

View file

@ -19,6 +19,20 @@ class StatusesIndex < Chewy::Index
type: 'stemmer', type: 'stemmer',
language: 'possessive_english', language: 'possessive_english',
}, },
tsconvert: {
type: "stconvert",
delimiter: "#",
keep_both: false,
convert_type: "t2s"
}
},
char_filter: {
tsconvert: {
type: 'stconvert',
convert_type: 't2s',
},
}, },
analyzer: { analyzer: {
@ -28,7 +42,7 @@ class StatusesIndex < Chewy::Index
}, },
content: { content: {
tokenizer: 'standard', tokenizer: 'ik_max_word',
filter: %w( filter: %w(
lowercase lowercase
asciifolding asciifolding
@ -38,6 +52,21 @@ class StatusesIndex < Chewy::Index
english_stop english_stop
english_stemmer english_stemmer
), ),
char_filter: %w(tsconvert),
},
content_search: {
tokenizer: 'ik_smart',
filter: %w(
lowercase
asciifolding
cjk_width
elision
english_possessive_stemmer
english_stop
english_stemmer
),
char_filter: %w(tsconvert),
}, },
hashtag: { hashtag: {
@ -48,6 +77,7 @@ class StatusesIndex < Chewy::Index
asciifolding asciifolding
cjk_width cjk_width
), ),
char_filter: %w(tsconvert),
}, },
}, },
} }
@ -57,7 +87,7 @@ class StatusesIndex < Chewy::Index
root date_detection: false do root date_detection: false do
field(:id, type: 'long') field(:id, type: 'long')
field(:account_id, type: 'long') field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content', search_analyzer: 'content_search') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by }) field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by })
field(:language, type: 'keyword') field(:language, type: 'keyword')

View file

@ -4,6 +4,22 @@ class TagsIndex < Chewy::Index
include DatetimeClampingConcern include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s'), analysis: { settings index: index_preset(refresh_interval: '30s'), analysis: {
filter: {
tsconvert: {
type: "stconvert",
delimiter: "#",
keep_both: false,
convert_type: "t2s"
}
},
char_filter: {
tsconvert: {
type: 'stconvert',
convert_type: 't2s',
},
},
analyzer: { analyzer: {
content: { content: {
tokenizer: 'keyword', tokenizer: 'keyword',
@ -13,6 +29,7 @@ class TagsIndex < Chewy::Index
asciifolding asciifolding
cjk_width cjk_width
), ),
char_filter: %w(tsconvert),
}, },
edge_ngram: { edge_ngram: {
@ -22,6 +39,7 @@ class TagsIndex < Chewy::Index
asciifolding asciifolding
cjk_width cjk_width
), ),
char_filter: %w(tsconvert),
}, },
}, },