* Fetch up to 5 replies when discovering a new remote status This is used for resolving threads downwards. The originating server must add a “replies” attributes with such replies for it to be useful. * Add some tests for ActivityPub::FetchRepliesWorker * Add specs for ActivityPub::FetchRepliesService * Serialize up to 5 public self-replies for ActivityPub notes * Add specs for ActivityPub::NoteSerializer * Move exponential backoff logic to a worker concern * Fetch first page of paginated collections when fetching thread replies * Add specs for paginated collections in replies * Move Note replies serialization to a first CollectionPage The collection isn't actually paginable yet as it has no id nor a `next` field. This may come in another PR. * Use pluck(:uri) instead of map(&:uri) to improve performances * Fix fetching replies when they are in a CollectionPagepull/4/head
@ -1,5 +1,5 @@ | |||
# frozen_string_literal: true | |||
class ActivityPub::CollectionPresenter < ActiveModelSerializers::Model | |||
attributes :id, :type, :size, :items, :part_of, :first, :last, :next, :prev | |||
attributes :id, :type, :size, :items, :page, :part_of, :first, :last, :next, :prev | |||
end |
@ -0,0 +1,60 @@ | |||
# frozen_string_literal: true | |||
class ActivityPub::FetchRepliesService < BaseService | |||
include JsonLdHelper | |||
def call(parent_status, collection_or_uri, allow_synchronous_requests = true) | |||
@account = parent_status.account | |||
@allow_synchronous_requests = allow_synchronous_requests | |||
@items = collection_items(collection_or_uri) | |||
return if @items.nil? | |||
FetchReplyWorker.push_bulk(filtered_replies) | |||
@items | |||
end | |||
private | |||
def collection_items(collection_or_uri) | |||
collection = fetch_collection(collection_or_uri) | |||
return unless collection.is_a?(Hash) | |||
collection = fetch_collection(collection['first']) if collection['first'].present? | |||
return unless collection.is_a?(Hash) | |||
case collection['type'] | |||
when 'Collection', 'CollectionPage' | |||
collection['items'] | |||
when 'OrderedCollection', 'OrderedCollectionPage' | |||
collection['orderedItems'] | |||
end | |||
end | |||
def fetch_collection(collection_or_uri) | |||
return collection_or_uri if collection_or_uri.is_a?(Hash) | |||
return unless @allow_synchronous_requests | |||
return if invalid_origin?(collection_or_uri) | |||
collection = fetch_resource_without_id_validation(collection_or_uri) | |||
raise Mastodon::UnexpectedResponseError if collection.nil? | |||
collection | |||
end | |||
def filtered_replies | |||
# Only fetch replies to the same server as the original status to avoid | |||
# amplification attacks. | |||
# Also limit to 5 fetched replies to limit potential for DoS. | |||
@items.map { |item| value_or_id(item) }.reject { |uri| invalid_origin?(uri) }.take(5) | |||
end | |||
def invalid_origin?(url) | |||
return true if unsupported_uri_scheme?(url) | |||
needle = Addressable::URI.parse(url).host | |||
haystack = Addressable::URI.parse(@account.uri).host | |||
!haystack.casecmp(needle).zero? | |||
end | |||
end |
@ -0,0 +1,12 @@ | |||
# frozen_string_literal: true | |||
class ActivityPub::FetchRepliesWorker | |||
include Sidekiq::Worker | |||
include ExponentialBackoff | |||
sidekiq_options queue: 'pull', retry: 3 | |||
def perform(parent_status_id, replies_uri) | |||
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri) | |||
end | |||
end |
@ -0,0 +1,11 @@ | |||
# frozen_string_literal: true | |||
module ExponentialBackoff | |||
extend ActiveSupport::Concern | |||
included do | |||
sidekiq_retry_in do |count| | |||
15 + 10 * (count**4) + rand(10 * (count**4)) | |||
end | |||
end | |||
end |
@ -0,0 +1,12 @@ | |||
# frozen_string_literal: true | |||
class FetchReplyWorker | |||
include Sidekiq::Worker | |||
include ExponentialBackoff | |||
sidekiq_options queue: 'pull', retry: 3 | |||
def perform(child_url) | |||
FetchRemoteStatusService.new.call(child_url) | |||
end | |||
end |
@ -0,0 +1,44 @@ | |||
# frozen_string_literal: true | |||
require 'rails_helper' | |||
describe ActivityPub::NoteSerializer do | |||
let!(:account) { Fabricate(:account) } | |||
let!(:other) { Fabricate(:account) } | |||
let!(:parent) { Fabricate(:status, account: account, visibility: :public) } | |||
let!(:reply1) { Fabricate(:status, account: account, thread: parent, visibility: :public) } | |||
let!(:reply2) { Fabricate(:status, account: account, thread: parent, visibility: :public) } | |||
let!(:reply3) { Fabricate(:status, account: other, thread: parent, visibility: :public) } | |||
let!(:reply4) { Fabricate(:status, account: account, thread: parent, visibility: :public) } | |||
let!(:reply5) { Fabricate(:status, account: account, thread: parent, visibility: :direct) } | |||
before(:each) do | |||
@serialization = ActiveModelSerializers::SerializableResource.new(parent, serializer: ActivityPub::NoteSerializer, adapter: ActivityPub::Adapter) | |||
end | |||
subject { JSON.parse(@serialization.to_json) } | |||
it 'has a Note type' do | |||
expect(subject['type']).to eql('Note') | |||
end | |||
it 'has a replies collection' do | |||
expect(subject['replies']['type']).to eql('Collection') | |||
end | |||
it 'has a replies collection with a first Page' do | |||
expect(subject['replies']['first']['type']).to eql('CollectionPage') | |||
end | |||
it 'includes public self-replies in its replies collection' do | |||
expect(subject['replies']['first']['items']).to include(reply1.uri, reply2.uri, reply4.uri) | |||
end | |||
it 'does not include replies from others in its replies collection' do | |||
expect(subject['replies']['first']['items']).to_not include(reply3.uri) | |||
end | |||
it 'does not include replies with direct visibility in its replies collection' do | |||
expect(subject['replies']['first']['items']).to_not include(reply5.uri) | |||
end | |||
end |
@ -0,0 +1,122 @@ | |||
require 'rails_helper' | |||
RSpec.describe ActivityPub::FetchRepliesService, type: :service do | |||
let(:actor) { Fabricate(:account, domain: 'example.com', uri: 'http://example.com/account') } | |||
let(:status) { Fabricate(:status, account: actor) } | |||
let(:collection_uri) { 'http://example.com/replies/1' } | |||
let(:items) do | |||
[ | |||
'http://example.com/self-reply-1', | |||
'http://example.com/self-reply-2', | |||
'http://example.com/self-reply-3', | |||
'http://other.com/other-reply-1', | |||
'http://other.com/other-reply-2', | |||
'http://other.com/other-reply-3', | |||
'http://example.com/self-reply-4', | |||
'http://example.com/self-reply-5', | |||
'http://example.com/self-reply-6', | |||
] | |||
end | |||
let(:payload) do | |||
{ | |||
'@context': 'https://www.w3.org/ns/activitystreams', | |||
type: 'Collection', | |||
id: collection_uri, | |||
items: items, | |||
}.with_indifferent_access | |||
end | |||
subject { described_class.new } | |||
describe '#call' do | |||
context 'when the payload is a Collection with inlined replies' do | |||
context 'when passing the collection itself' do | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, payload) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
context 'when passing the URL to the collection' do | |||
before do | |||
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | |||
end | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, collection_uri) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
end | |||
context 'when the payload is an OrderedCollection with inlined replies' do | |||
let(:payload) do | |||
{ | |||
'@context': 'https://www.w3.org/ns/activitystreams', | |||
type: 'OrderedCollection', | |||
id: collection_uri, | |||
orderedItems: items, | |||
}.with_indifferent_access | |||
end | |||
context 'when passing the collection itself' do | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, payload) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
context 'when passing the URL to the collection' do | |||
before do | |||
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | |||
end | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, collection_uri) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
end | |||
context 'when the payload is a paginated Collection with inlined replies' do | |||
let(:payload) do | |||
{ | |||
'@context': 'https://www.w3.org/ns/activitystreams', | |||
type: 'Collection', | |||
id: collection_uri, | |||
first: { | |||
type: 'CollectionPage', | |||
partOf: collection_uri, | |||
items: items, | |||
} | |||
}.with_indifferent_access | |||
end | |||
context 'when passing the collection itself' do | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, payload) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
context 'when passing the URL to the collection' do | |||
before do | |||
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | |||
end | |||
it 'spawns workers for up to 5 replies on the same server' do | |||
allow(FetchReplyWorker).to receive(:push_bulk) | |||
subject.call(status, collection_uri) | |||
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | |||
end | |||
end | |||
end | |||
end | |||
end |
@ -0,0 +1,40 @@ | |||
# frozen_string_literal: true | |||
require 'rails_helper' | |||
describe ActivityPub::FetchRepliesWorker do | |||
subject { described_class.new } | |||
let(:account) { Fabricate(:account, uri: 'https://example.com/user/1') } | |||
let(:status) { Fabricate(:status, account: account) } | |||
let(:payload) do | |||
{ | |||
'@context': 'https://www.w3.org/ns/activitystreams', | |||
id: 'https://example.com/statuses_replies/1', | |||
type: 'Collection', | |||
items: [], | |||
} | |||
end | |||
let(:json) { Oj.dump(payload) } | |||
describe 'perform' do | |||
it 'performs a request if the collection URI is from the same host' do | |||
stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 200, body: json) | |||
subject.perform(status.id, 'https://example.com/statuses_replies/1') | |||
expect(a_request(:get, 'https://example.com/statuses_replies/1')).to have_been_made.once | |||
end | |||
it 'does not perform a request if the collection URI is from a different host' do | |||
stub_request(:get, 'https://other.com/statuses_replies/1').to_return(status: 200) | |||
subject.perform(status.id, 'https://other.com/statuses_replies/1') | |||
expect(a_request(:get, 'https://other.com/statuses_replies/1')).to_not have_been_made | |||
end | |||
it 'raises when request fails' do | |||
stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 500) | |||
expect { subject.perform(status.id, 'https://example.com/statuses_replies/1') }.to raise_error Mastodon::UnexpectedResponseError | |||
end | |||
end | |||
end |