diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 796584e9c..03cfab8c5 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -25,12 +25,26 @@ class AccountsIndex < Chewy::Index output_unigrams: true, token_separator: '', }, + + tsconvert: { + type: "stconvert", + delimiter: "#", + keep_both: false, + convert_type: "t2s" + } + }, + + char_filter: { + tsconvert: { + type: 'stconvert', + convert_type: 't2s', + }, }, analyzer: { # "The FOOING's bar" becomes "foo bar" natural: { - tokenizer: 'standard', + tokenizer: 'ik_max_word', filter: %w( lowercase asciifolding @@ -40,12 +54,14 @@ class AccountsIndex < Chewy::Index english_stop english_stemmer ), + char_filter: %w(tsconvert), }, # "FOO bar" becomes "foo bar" verbatim: { - tokenizer: 'standard', + tokenizer: 'ik_max_word', filter: %w(lowercase asciifolding cjk_width), + char_filter: %w(tsconvert), }, # "Foo bar" becomes "foo bar foobar" diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index 09a4dfc09..a9a8f1769 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -19,6 +19,20 @@ class PublicStatusesIndex < Chewy::Index type: 'stemmer', language: 'possessive_english', }, + + tsconvert: { + type: "stconvert", + delimiter: "#", + keep_both: false, + convert_type: "t2s" + } + }, + + char_filter: { + tsconvert: { + type: 'stconvert', + convert_type: 't2s', + }, }, analyzer: { @@ -28,7 +42,7 @@ class PublicStatusesIndex < Chewy::Index }, content: { - tokenizer: 'standard', + tokenizer: 'ik_max_word', filter: %w( lowercase asciifolding @@ -38,6 +52,21 @@ class PublicStatusesIndex < Chewy::Index english_stop english_stemmer ), + char_filter: %w(tsconvert), + }, + + content_search: { + tokenizer: 'ik_smart', + filter: %w( + lowercase + asciifolding + cjk_width + elision + english_possessive_stemmer + english_stop + english_stemmer + ), + char_filter: %w(tsconvert), }, hashtag: { @@ -48,6 +77,7 @@ class PublicStatusesIndex < Chewy::Index asciifolding cjk_width ), + char_filter: %w(tsconvert), }, }, } @@ -60,7 +90,7 @@ class PublicStatusesIndex < Chewy::Index root date_detection: false do field(:id, type: 'long') field(:account_id, type: 'long') - field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content', search_analyzer: 'content_search') } field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index e739ccecb..1381809ac 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -19,6 +19,20 @@ class StatusesIndex < Chewy::Index type: 'stemmer', language: 'possessive_english', }, + + tsconvert: { + type: "stconvert", + delimiter: "#", + keep_both: false, + convert_type: "t2s" + } + }, + + char_filter: { + tsconvert: { + type: 'stconvert', + convert_type: 't2s', + }, }, analyzer: { @@ -28,7 +42,7 @@ class StatusesIndex < Chewy::Index }, content: { - tokenizer: 'standard', + tokenizer: 'ik_max_word', filter: %w( lowercase asciifolding @@ -38,6 +52,21 @@ class StatusesIndex < Chewy::Index english_stop english_stemmer ), + char_filter: %w(tsconvert), + }, + + content_search: { + tokenizer: 'ik_smart', + filter: %w( + lowercase + asciifolding + cjk_width + elision + english_possessive_stemmer + english_stop + english_stemmer + ), + char_filter: %w(tsconvert), }, hashtag: { @@ -48,6 +77,7 @@ class StatusesIndex < Chewy::Index asciifolding cjk_width ), + char_filter: %w(tsconvert), }, }, } @@ -57,7 +87,7 @@ class StatusesIndex < Chewy::Index root date_detection: false do field(:id, type: 'long') field(:account_id, type: 'long') - field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content', search_analyzer: 'content_search') } field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by }) field(:language, type: 'keyword') diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb index c99218a47..3e359d8c1 100644 --- a/app/chewy/tags_index.rb +++ b/app/chewy/tags_index.rb @@ -4,6 +4,22 @@ class TagsIndex < Chewy::Index include DatetimeClampingConcern settings index: index_preset(refresh_interval: '30s'), analysis: { + filter: { + tsconvert: { + type: "stconvert", + delimiter: "#", + keep_both: false, + convert_type: "t2s" + } + }, + + char_filter: { + tsconvert: { + type: 'stconvert', + convert_type: 't2s', + }, + }, + analyzer: { content: { tokenizer: 'keyword', @@ -13,6 +29,7 @@ class TagsIndex < Chewy::Index asciifolding cjk_width ), + char_filter: %w(tsconvert), }, edge_ngram: { @@ -22,6 +39,7 @@ class TagsIndex < Chewy::Index asciifolding cjk_width ), + char_filter: %w(tsconvert), }, },