From 7f0c49c58ab75bbd6b4dc44d47a8c4ab57db9d51 Mon Sep 17 00:00:00 2001 From: abcang Date: Sun, 25 Apr 2021 13:33:28 +0900 Subject: [PATCH] Improve tag search query (#16104) --- app/models/tag.rb | 14 ++++++-------- ...431_add_case_insensitive_btree_index_to_tags.rb | 13 +++++++++++++ db/schema.rb | 4 ++-- spec/models/tag_spec.rb | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 db/migrate/20210421121431_add_case_insensitive_btree_index_to_tags.rb diff --git a/app/models/tag.rb b/app/models/tag.rb index bb93a52e2e..efffc7eee4 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -40,7 +40,8 @@ class Tag < ApplicationRecord scope :trendable, -> { Setting.trendable_by_default ? where(trendable: [true, nil]) : where(trendable: true) } scope :discoverable, -> { listable.joins(:account_tag_stat).where(AccountTagStat.arel_table[:accounts_count].gt(0)).order(Arel.sql('account_tag_stats.accounts_count desc')) } scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) } - scope :matches_name, ->(value) { where(arel_table[:name].matches("#{value}%")) } + # Search with case-sensitive to use B-tree index. + scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } delegate :accounts_count, :accounts_count=, @@ -126,10 +127,9 @@ class Tag < ApplicationRecord end def search_for(term, limit = 5, offset = 0, options = {}) - normalized_term = normalize(term.strip) - pattern = sanitize_sql_like(normalized_term) + '%' - query = Tag.listable.where(arel_table[:name].lower.matches(pattern)) - query = query.where(arel_table[:name].lower.eq(normalized_term).or(arel_table[:reviewed_at].not_eq(nil))) if options[:exclude_unreviewed] + striped_term = term.strip + query = Tag.listable.matches_name(striped_term) + query = query.merge(matching_name(striped_term).or(where.not(reviewed_at: nil))) if options[:exclude_unreviewed] query.order(Arel.sql('length(name) ASC, name ASC')) .limit(limit) @@ -145,7 +145,7 @@ class Tag < ApplicationRecord end def matching_name(name_or_names) - names = Array(name_or_names).map { |name| normalize(name).mb_chars.downcase.to_s } + names = Array(name_or_names).map { |name| arel_table.lower(normalize(name)) } if names.size == 1 where(arel_table[:name].lower.eq(names.first)) @@ -154,8 +154,6 @@ class Tag < ApplicationRecord end end - private - def normalize(str) str.gsub(/\A#/, '') end diff --git a/db/migrate/20210421121431_add_case_insensitive_btree_index_to_tags.rb b/db/migrate/20210421121431_add_case_insensitive_btree_index_to_tags.rb new file mode 100644 index 0000000000..ed359e8cd9 --- /dev/null +++ b/db/migrate/20210421121431_add_case_insensitive_btree_index_to_tags.rb @@ -0,0 +1,13 @@ +class AddCaseInsensitiveBtreeIndexToTags < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower_btree ON tags (lower(name) text_pattern_ops)' } + remove_index :tags, name: 'index_tags_on_name_lower' + end + + def down + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' } + remove_index :tags, name: 'index_tags_on_name_lower_btree' + end +end diff --git a/db/schema.rb b/db/schema.rb index dcbbf4aea9..8dc0661fc9 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2021_04_16_200740) do +ActiveRecord::Schema.define(version: 2021_04_21_121431) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -862,7 +862,7 @@ ActiveRecord::Schema.define(version: 2021_04_16_200740) do t.datetime "last_status_at" t.float "max_score" t.datetime "max_score_at" - t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true + t.index "lower((name)::text) text_pattern_ops", name: "index_tags_on_name_lower_btree", unique: true end create_table "tombstones", force: :cascade do |t| diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index df876593c9..3949dbce54 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -96,6 +96,20 @@ RSpec.describe Tag, type: :model do end end + describe '.matches_name' do + it 'returns tags for multibyte case-insensitive names' do + upcase_string = 'abcABCabcABCやゆよ' + downcase_string = 'abcabcabcabcやゆよ'; + + tag = Fabricate(:tag, name: downcase_string) + expect(Tag.matches_name(upcase_string)).to eq [tag] + end + + it 'uses the LIKE operator' do + expect(Tag.matches_name('100%abc').to_sql).to eq %q[SELECT "tags".* FROM "tags" WHERE LOWER("tags"."name") LIKE LOWER('100\\%abc%')] + end + end + describe '.matching_name' do it 'returns tags for multibyte case-insensitive names' do upcase_string = 'abcABCabcABCやゆよ'