From d9ae3db8d5543cf0b7fa44186c191c9bb2472d23 Mon Sep 17 00:00:00 2001 From: Claire Date: Wed, 5 May 2021 22:04:52 +0200 Subject: [PATCH] Improve performance of follow recommendation scheduler (#16159) Express follow_recommendations in terms of account_summaries rather than accounts, integrate filters that are unconditionally used, and materialize the resulting view. This should result in the bulk of the computation being performed only once instead of **once per recommendation language**. --- app/models/follow_recommendation.rb | 6 ++-- .../follow_recommendations_scheduler.rb | 5 +-- ...ate_follow_recommendations_to_version_2.rb | 18 ++++++++++ db/schema.rb | 28 ++++++++------- db/views/follow_recommendations_v02.sql | 34 +++++++++++++++++++ 5 files changed, 75 insertions(+), 16 deletions(-) create mode 100644 db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb create mode 100644 db/views/follow_recommendations_v02.sql diff --git a/app/models/follow_recommendation.rb b/app/models/follow_recommendation.rb index 6670b6560..1ed6dc49b 100644 --- a/app/models/follow_recommendation.rb +++ b/app/models/follow_recommendation.rb @@ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord belongs_to :account_summary, foreign_key: :account_id belongs_to :account, foreign_key: :account_id - scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) } scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) } - scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) } + + def self.refresh + Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false) + end def readonly? true diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb index 0a0286496..cb1e15961 100644 --- a/app/workers/scheduler/follow_recommendations_scheduler.rb +++ b/app/workers/scheduler/follow_recommendations_scheduler.rb @@ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler def perform # Maintaining a materialized view speeds-up subsequent queries significantly AccountSummary.refresh + FollowRecommendation.refresh - fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id) + fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id) I18n.available_locales.each do |locale| recommendations = begin if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist - FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id) + FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id) else {} end diff --git a/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb b/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb new file mode 100644 index 000000000..9b2a284e4 --- /dev/null +++ b/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb @@ -0,0 +1,18 @@ +class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1] + # We're switching from a normal to a materialized view so we need + # custom `up` and `down` paths. + + def up + drop_view :follow_recommendations + create_view :follow_recommendations, version: 2, materialized: true + + # To be able to refresh the view concurrently, + # at least one unique index is required + safety_assured { add_index :follow_recommendations, :account_id, unique: true } + end + + def down + drop_view :follow_recommendations, materialized: true + create_view :follow_recommendations, version: 1 + end +end diff --git a/db/schema.rb b/db/schema.rb index 0d951ee95..88e906079 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2021_04_25_135952) do +ActiveRecord::Schema.define(version: 2021_05_05_174616) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do SQL add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true - create_view "follow_recommendations", sql_definition: <<-SQL + create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL SELECT t0.account_id, sum(t0.rank) AS rank, array_agg(t0.reason) AS reason - FROM ( SELECT accounts.id AS account_id, + FROM ( SELECT account_summaries.account_id, ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank, 'most_followed'::text AS reason - FROM ((follows - JOIN accounts ON ((accounts.id = follows.target_account_id))) + FROM (((follows + JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id))) JOIN users ON ((users.account_id = follows.account_id))) - WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) - GROUP BY accounts.id + LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id))) + WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL)) + GROUP BY account_summaries.account_id HAVING (count(follows.id) >= 5) UNION ALL - SELECT accounts.id AS account_id, + SELECT account_summaries.account_id, (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank, 'most_interactions'::text AS reason - FROM ((status_stats + FROM (((status_stats JOIN statuses ON ((statuses.id = status_stats.status_id))) - JOIN accounts ON ((accounts.id = statuses.account_id))) - WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true)) - GROUP BY accounts.id + JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id))) + LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id))) + WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL)) + GROUP BY account_summaries.account_id HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0 GROUP BY t0.account_id ORDER BY (sum(t0.rank)) DESC; SQL + add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true + end diff --git a/db/views/follow_recommendations_v02.sql b/db/views/follow_recommendations_v02.sql new file mode 100644 index 000000000..673c5cc85 --- /dev/null +++ b/db/views/follow_recommendations_v02.sql @@ -0,0 +1,34 @@ +SELECT + account_id, + sum(rank) AS rank, + array_agg(reason) AS reason +FROM ( + SELECT + account_summaries.account_id AS account_id, + count(follows.id) / (1.0 + count(follows.id)) AS rank, + 'most_followed' AS reason + FROM follows + INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id + INNER JOIN users ON users.account_id = follows.account_id + LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id + WHERE users.current_sign_in_at >= (now() - interval '30 days') + AND account_summaries.sensitive = 'f' + AND follow_recommendation_suppressions.id IS NULL + GROUP BY account_summaries.account_id + HAVING count(follows.id) >= 5 + UNION ALL + SELECT account_summaries.account_id AS account_id, + sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank, + 'most_interactions' AS reason + FROM status_stats + INNER JOIN statuses ON statuses.id = status_stats.status_id + INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id + LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id + WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16) + AND account_summaries.sensitive = 'f' + AND follow_recommendation_suppressions.id IS NULL + GROUP BY account_summaries.account_id + HAVING sum(reblogs_count + favourites_count) >= 5 +) t0 +GROUP BY account_id +ORDER BY rank DESC