From 19b4c666f7f760986be4215ea874f951c617a0df Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Thu, 31 May 2018 17:09:09 +0200 Subject: [PATCH] Improve account index migration (#7684) * Improve account index migration - Display more progress in stdout - Catch PG::UniqueViolation when re-attributing favourites - Skip callbacks and validations when re-attributing other relationships * Use in_batches to reduce table lock-up during account merge * Use #say_with_time to benchmark each deduplication --- ...0180528141303_fix_accounts_unique_index.rb | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/db/migrate/20180528141303_fix_accounts_unique_index.rb b/db/migrate/20180528141303_fix_accounts_unique_index.rb index aadb5b7db..e949436dc 100644 --- a/db/migrate/20180528141303_fix_accounts_unique_index.rb +++ b/db/migrate/20180528141303_fix_accounts_unique_index.rb @@ -39,26 +39,28 @@ class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2] accounts = accounts.first.local? ? accounts.sort_by(&:created_at) : accounts.sort_by(&:updated_at).reverse reference_account = accounts.shift - accounts.each do |other_account| - if other_account.public_key == reference_account.public_key - # The accounts definitely point to the same resource, so - # it's safe to re-attribute content and relationships - merge_accounts!(reference_account, other_account) - elsif other_account.local? - # Since domain is in the GROUP BY clause, both accounts - # are always either going to be local or not local, so only - # one check is needed. Since we cannot support two users with - # the same username locally, one has to go. 😢 - other_account.user&.destroy - end + say_with_time "Deduplicating @#{reference_account.acct} (#{accounts.size} duplicates)..." do + accounts.each do |other_account| + if other_account.public_key == reference_account.public_key + # The accounts definitely point to the same resource, so + # it's safe to re-attribute content and relationships + merge_accounts!(reference_account, other_account) + elsif other_account.local? + # Since domain is in the GROUP BY clause, both accounts + # are always either going to be local or not local, so only + # one check is needed. Since we cannot support two users with + # the same username locally, one has to go. 😢 + other_account.user&.destroy + end - other_account.destroy + other_account.destroy + end end end def merge_accounts!(main_account, duplicate_account) - [Status, Favourite, Mention, StatusPin, StreamEntry].each do |klass| - klass.where(account_id: duplicate_account.id).update_all(account_id: main_account.id) + [Status, Mention, StatusPin, StreamEntry].each do |klass| + klass.where(account_id: duplicate_account.id).in_batches.update_all(account_id: main_account.id) end # Since it's the same remote resource, the remote resource likely @@ -67,19 +69,19 @@ class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2] # of the index bug users could have *also* followed the reference # account already, therefore mass update will not work and we need # to check for (and skip past) uniqueness errors - [Follow, FollowRequest, Block, Mute].each do |klass| + [Favourite, Follow, FollowRequest, Block, Mute].each do |klass| klass.where(account_id: duplicate_account.id).find_each do |record| begin - record.update(account_id: main_account.id) - rescue ActiveRecord::RecordNotUnique + record.update_attribute(:account_id, main_account.id) + rescue PG::UniqueViolation next end end klass.where(target_account_id: duplicate_account.id).find_each do |record| begin - record.update(target_account_id: main_account.id) - rescue ActiveRecord::RecordNotUnique + record.update_attribute(:target_account_id, main_account.id) + rescue PG::UniqueViolation next end end