From 17c591ffba59bda512fe43a09c06c40324acc472 Mon Sep 17 00:00:00 2001 From: Eugen Date: Tue, 25 Apr 2017 02:47:31 +0200 Subject: [PATCH] Punycode URI normalization (#2370) * Fix #2119 - Whenever about to send a HTTP request, normalize the URI * Add test for IDN request in FetchLinkCardService * Perform IDN normalization on domains before they are stored in the DB --- app/controllers/api/push_controller.rb | 2 +- .../authorize_follow_controller.rb | 2 +- app/lib/tag_manager.rb | 8 +- app/models/account.rb | 31 +- app/models/domain_block.rb | 8 + app/models/media_attachment.rb | 2 +- app/services/fetch_link_card_service.rb | 2 +- app/services/fetch_remote_account_service.rb | 2 +- app/services/fetch_remote_status_service.rb | 2 +- app/services/follow_remote_account_service.rb | 2 +- app/services/process_feed_service.rb | 6 +- app/services/process_interaction_service.rb | 2 +- .../pubsubhubbub/subscribe_service.rb | 2 +- app/validators/url_validator.rb | 2 +- app/workers/pubsubhubbub/delivery_worker.rb | 2 +- spec/fixtures/requests/idn.txt | 483 ++++++++++++++++++ spec/services/fetch_link_card_service_spec.rb | 14 + 17 files changed, 546 insertions(+), 26 deletions(-) create mode 100644 spec/fixtures/requests/idn.txt create mode 100644 spec/services/fetch_link_card_service_spec.rb diff --git a/app/controllers/api/push_controller.rb b/app/controllers/api/push_controller.rb index f2ddfd969e..bc547d6530 100644 --- a/app/controllers/api/push_controller.rb +++ b/app/controllers/api/push_controller.rb @@ -26,7 +26,7 @@ class Api::PushController < ApiController def topic_to_account(topic_url) return if topic_url.blank? - uri = Addressable::URI.parse(topic_url) + uri = Addressable::URI.parse(topic_url).normalize params = Rails.application.routes.recognize_path(uri.path) domain = uri.host + (uri.port ? ":#{uri.port}" : '') diff --git a/app/controllers/authorize_follow_controller.rb b/app/controllers/authorize_follow_controller.rb index c98a5f45f5..9b28a9455a 100644 --- a/app/controllers/authorize_follow_controller.rb +++ b/app/controllers/authorize_follow_controller.rb @@ -6,7 +6,7 @@ class AuthorizeFollowController < ApplicationController before_action :authenticate_user! def new - uri = Addressable::URI.parse(acct_param) + uri = Addressable::URI.parse(acct_param).normalize if uri.path && %w(http https).include?(uri.scheme) set_account_from_url diff --git a/app/lib/tag_manager.rb b/app/lib/tag_manager.rb index f26c943d25..3bddfba7c2 100644 --- a/app/lib/tag_manager.rb +++ b/app/lib/tag_manager.rb @@ -64,6 +64,12 @@ class TagManager domain.nil? || domain.gsub(/[\/]/, '').casecmp(Rails.configuration.x.local_domain).zero? end + def normalize_domain(domain) + uri = Addressable::URI.new + uri.host = domain + uri.normalize.host + end + def same_acct?(canonical, needle) return true if canonical.casecmp(needle).zero? username, domain = needle.split('@') @@ -71,7 +77,7 @@ class TagManager end def local_url?(url) - uri = Addressable::URI.parse(url) + uri = Addressable::URI.parse(url).normalize domain = uri.host + (uri.port ? ":#{uri.port}" : '') TagManager.instance.local_domain?(domain) end diff --git a/app/models/account.rb b/app/models/account.rb index 084b17f43d..eebcf90b8f 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -182,22 +182,22 @@ class Account < ApplicationRecord end def avatar_remote_url=(url) - parsed_url = URI.parse(url) + parsed_url = Addressable::URI.parse(url).normalize return if !%w(http https).include?(parsed_url.scheme) || parsed_url.host.empty? || self[:avatar_remote_url] == url - self.avatar = parsed_url + self.avatar = URI.parse(parsed_url.to_s) self[:avatar_remote_url] = url rescue OpenURI::HTTPError => e Rails.logger.debug "Error fetching remote avatar: #{e}" end def header_remote_url=(url) - parsed_url = URI.parse(url) + parsed_url = Addressable::URI.parse(url).normalize return if !%w(http https).include?(parsed_url.scheme) || parsed_url.host.empty? || self[:header_remote_url] == url - self.header = parsed_url + self.header = URI.parse(parsed_url.to_s) self[:header_remote_url] = url rescue OpenURI::HTTPError => e Rails.logger.debug "Error fetching remote header: #{e}" @@ -331,16 +331,25 @@ class Account < ApplicationRecord end end - before_create do - if local? - keypair = OpenSSL::PKey::RSA.new(Rails.env.test? ? 1024 : 2048) - self.private_key = keypair.to_pem - self.public_key = keypair.public_key.to_pem - end - end + before_create :generate_keys + before_validation :normalize_domain private + def generate_keys + return unless local? + + keypair = OpenSSL::PKey::RSA.new(Rails.env.test? ? 1024 : 2048) + self.private_key = keypair.to_pem + self.public_key = keypair.public_key.to_pem + end + + def normalize_domain + return if local? + + self.domain = TagManager.instance.normalize_domain(domain) + end + def set_file_extensions unless avatar.blank? extension = Paperclip::Interpolations.content_type_extension(avatar, :original) diff --git a/app/models/domain_block.rb b/app/models/domain_block.rb index baf5c3973b..6c2ba70f76 100644 --- a/app/models/domain_block.rb +++ b/app/models/domain_block.rb @@ -13,4 +13,12 @@ class DomainBlock < ApplicationRecord def self.blocked?(domain) where(domain: domain, severity: :suspend).exists? end + + before_validation :normalize_domain + + private + + def normalize_domain + self.domain = TagManager.instance.normalize_domain(domain) + end end diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index 181e01674f..a43c76c77c 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -42,7 +42,7 @@ class MediaAttachment < ApplicationRecord end def file_remote_url=(url) - self.file = URI.parse(url) + self.file = URI.parse(Addressable::URI.parse(url).normalize.to_s) end def to_param diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 31f9be194e..f74a0d34dc 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -19,7 +19,7 @@ class FetchLinkCardService < BaseService card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content card.description = meta_property(page, 'og:description') || meta_property(page, 'description') - card.image = URI.parse(meta_property(page, 'og:image')) if meta_property(page, 'og:image') + card.image = URI.parse(Addressable::URI.parse(meta_property(page, 'og:image')).normalize.to_s) if meta_property(page, 'og:image') return if card.title.blank? diff --git a/app/services/fetch_remote_account_service.rb b/app/services/fetch_remote_account_service.rb index 50ffc47c66..7c2fb0ef12 100644 --- a/app/services/fetch_remote_account_service.rb +++ b/app/services/fetch_remote_account_service.rb @@ -21,7 +21,7 @@ class FetchRemoteAccountService < BaseService email = xml.at_xpath('//xmlns:author/xmlns:email').try(:content) if email.nil? - url_parts = Addressable::URI.parse(url) + url_parts = Addressable::URI.parse(url).normalize username = xml.at_xpath('//xmlns:author/xmlns:name').try(:content) domain = url_parts.host else diff --git a/app/services/fetch_remote_status_service.rb b/app/services/fetch_remote_status_service.rb index e2d1857231..c666961ad2 100644 --- a/app/services/fetch_remote_status_service.rb +++ b/app/services/fetch_remote_status_service.rb @@ -31,7 +31,7 @@ class FetchRemoteStatusService < BaseService end def extract_author(url, xml) - url_parts = Addressable::URI.parse(url) + url_parts = Addressable::URI.parse(url).normalize username = xml.at_xpath('//xmlns:author/xmlns:name').try(:content) domain = url_parts.host diff --git a/app/services/follow_remote_account_service.rb b/app/services/follow_remote_account_service.rb index 2d7414bc0b..fbf983093a 100644 --- a/app/services/follow_remote_account_service.rb +++ b/app/services/follow_remote_account_service.rb @@ -73,7 +73,7 @@ class FollowRemoteAccountService < BaseService end def get_feed(url) - response = http_client.get(Addressable::URI.parse(url)) + response = http_client.get(Addressable::URI.parse(url).normalize) [response.to_s, Nokogiri::XML(response)] end diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb index 98d92f6300..d002b9130e 100644 --- a/app/services/process_feed_service.rb +++ b/app/services/process_feed_service.rb @@ -174,7 +174,7 @@ class ProcessFeedService < BaseService end def account_from_href(href) - url = Addressable::URI.parse(href) + url = Addressable::URI.parse(href).normalize if TagManager.instance.web_domain?(url.host) Account.find_local(url.path.gsub('/users/', '')) @@ -195,7 +195,7 @@ class ProcessFeedService < BaseService next unless link['href'] media = MediaAttachment.where(status: parent, remote_url: link['href']).first_or_initialize(account: parent.account, status: parent, remote_url: link['href']) - parsed_url = URI.parse(link['href']) + parsed_url = Addressable::URI.parse(link['href']).normalize next if !%w[http https].include?(parsed_url.scheme) || parsed_url.host.empty? @@ -271,7 +271,7 @@ class ProcessFeedService < BaseService def acct(xml = @xml) username = xml.at_xpath('./xmlns:author/xmlns:name', xmlns: TagManager::XMLNS).content url = xml.at_xpath('./xmlns:author/xmlns:uri', xmlns: TagManager::XMLNS).content - domain = Addressable::URI.parse(url).host + domain = Addressable::URI.parse(url).normalize.host "#{username}@#{domain}" end diff --git a/app/services/process_interaction_service.rb b/app/services/process_interaction_service.rb index 805ca5a273..410e805d34 100644 --- a/app/services/process_interaction_service.rb +++ b/app/services/process_interaction_service.rb @@ -14,7 +14,7 @@ class ProcessInteractionService < BaseService username = xml.at_xpath('/xmlns:entry/xmlns:author/xmlns:name', xmlns: TagManager::XMLNS).content url = xml.at_xpath('/xmlns:entry/xmlns:author/xmlns:uri', xmlns: TagManager::XMLNS).content - domain = Addressable::URI.parse(url).host + domain = Addressable::URI.parse(url).normalize.host account = Account.find_by(username: username, domain: domain) if account.nil? diff --git a/app/services/pubsubhubbub/subscribe_service.rb b/app/services/pubsubhubbub/subscribe_service.rb index bf36e3fa6f..3642b4eca1 100644 --- a/app/services/pubsubhubbub/subscribe_service.rb +++ b/app/services/pubsubhubbub/subscribe_service.rb @@ -4,7 +4,7 @@ class Pubsubhubbub::SubscribeService < BaseService def call(account, callback, secret, lease_seconds) return ['Invalid topic URL', 422] if account.nil? return ['Invalid callback URL', 422] unless !callback.blank? && callback =~ /\A#{URI.regexp(%w(http https))}\z/ - return ['Callback URL not allowed', 403] if DomainBlock.blocked?(Addressable::URI.parse(callback).host) + return ['Callback URL not allowed', 403] if DomainBlock.blocked?(Addressable::URI.parse(callback).normalize.host) subscription = Subscription.where(account: account, callback_url: callback).first_or_create!(account: account, callback_url: callback) Pubsubhubbub::ConfirmationWorker.perform_async(subscription.id, 'subscribe', secret, lease_seconds) diff --git a/app/validators/url_validator.rb b/app/validators/url_validator.rb index 4a5c4ef3ff..f39560d90a 100644 --- a/app/validators/url_validator.rb +++ b/app/validators/url_validator.rb @@ -8,7 +8,7 @@ class UrlValidator < ActiveModel::EachValidator private def compliant?(url) - parsed_url = Addressable::URI.parse(url) + parsed_url = Addressable::URI.parse(url).normalize !parsed_url.nil? && %w(http https).include?(parsed_url.scheme) && parsed_url.host end end diff --git a/app/workers/pubsubhubbub/delivery_worker.rb b/app/workers/pubsubhubbub/delivery_worker.rb index b440f7986e..f645b1e24d 100644 --- a/app/workers/pubsubhubbub/delivery_worker.rb +++ b/app/workers/pubsubhubbub/delivery_worker.rb @@ -13,7 +13,7 @@ class Pubsubhubbub::DeliveryWorker def perform(subscription_id, payload) subscription = Subscription.find(subscription_id) headers = {} - host = Addressable::URI.parse(subscription.callback_url).host + host = Addressable::URI.parse(subscription.callback_url).normalize.host return if DomainBlock.blocked?(host) diff --git a/spec/fixtures/requests/idn.txt b/spec/fixtures/requests/idn.txt new file mode 100644 index 0000000000..3c76c59c05 --- /dev/null +++ b/spec/fixtures/requests/idn.txt @@ -0,0 +1,483 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Sun, 23 Apr 2017 19:37:13 GMT +Content-Type: text/html +Content-Length: 38111 +Last-Modified: Wed, 20 Jul 2016 02:50:52 GMT +Connection: keep-alive +Accept-Ranges: bytes + + + + + + + + + + + + + + + 中国域名网站 + + + +
+ +
+ +
+ + +
+
+
+

网址大全

+ +
+
+
+
+
+

中文域名简介

+

+ “中国域名”是中文域名的一种,特指以“中国”为后缀的中文域名,是我国域名体系和全球互联网域名体系的重要组成部分。“中国”是在全球互联网上代表中国的中文顶级域名,于2010年7月正式纳入全球互联网域名体系,全球互联网域名体系,全球网民可通过联网计算机在世界任何国家和地区实现无障碍访问。“中国”域名在使用上和 .CN,相似属于互联网上的基础服务,基于域名可以提供WWW.EMAIL FTP等应用服务。 +

+
+ + + + diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb new file mode 100644 index 0000000000..5d72d40b62 --- /dev/null +++ b/spec/services/fetch_link_card_service_spec.rb @@ -0,0 +1,14 @@ +require 'rails_helper' + +RSpec.describe FetchLinkCardService do + before do + stub_request(:get, 'http://example.xn--fiqs8s/').to_return(request_fixture('idn.txt')) + end + + it 'works with IDN URLs' do + status = Fabricate(:status, text: 'Check out http://example.中国') + + FetchLinkCardService.new.call(status) + expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made + end +end