You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

175 lines
6.6 KiB

  1. # frozen_string_literal: true
  2. require 'concurrent'
  3. require_relative '../../config/boot'
  4. require_relative '../../config/environment'
  5. require_relative 'cli_helper'
  6. module Mastodon
  7. class DomainsCLI < Thor
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  13. option :verbose, type: :boolean, aliases: [:v]
  14. option :dry_run, type: :boolean
  15. option :limited_federation_mode, type: :boolean
  16. desc 'purge [DOMAIN...]', 'Remove accounts from a DOMAIN without a trace'
  17. long_desc <<-LONG_DESC
  18. Remove all accounts from a given DOMAIN without leaving behind any
  19. records. Unlike a suspension, if the DOMAIN still exists in the wild,
  20. it means the accounts could return if they are resolved again.
  21. When the --limited-federation-mode option is given, instead of purging accounts
  22. from a single domain, all accounts from domains that have not been explicitly allowed
  23. are removed from the database.
  24. LONG_DESC
  25. def purge(*domains)
  26. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  27. scope = begin
  28. if options[:limited_federation_mode]
  29. Account.remote.where.not(domain: DomainAllow.pluck(:domain))
  30. elsif !domains.empty?
  31. Account.remote.where(domain: domains)
  32. else
  33. say('No domain(s) given', :red)
  34. exit(1)
  35. end
  36. end
  37. processed, = parallelize_with_progress(scope) do |account|
  38. DeleteAccountService.new.call(account, reserve_username: false, skip_side_effects: true) unless options[:dry_run]
  39. end
  40. DomainBlock.where(domain: domains).destroy_all unless options[:dry_run]
  41. say("Removed #{processed} accounts#{dry_run}", :green)
  42. custom_emojis = CustomEmoji.where(domain: domains)
  43. custom_emojis_count = custom_emojis.count
  44. custom_emojis.destroy_all unless options[:dry_run]
  45. Instance.refresh unless options[:dry_run]
  46. say("Removed #{custom_emojis_count} custom emojis", :green)
  47. end
  48. option :concurrency, type: :numeric, default: 50, aliases: [:c]
  49. option :format, type: :string, default: 'summary', aliases: [:f]
  50. option :exclude_suspended, type: :boolean, default: false, aliases: [:x]
  51. desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
  52. long_desc <<-LONG_DESC
  53. Crawl the fediverse by using the Mastodon REST API endpoints that expose
  54. all known peers, and collect statistics from those peers, as long as those
  55. peers support those API endpoints. When no START is given, the command uses
  56. this server's own database of known peers to seed the crawl.
  57. The --concurrency (-c) option controls the number of threads performing HTTP
  58. requests at the same time. More threads means the crawl may complete faster.
  59. The --format (-f) option controls how the data is displayed at the end. By
  60. default (`summary`), a summary of the statistics is returned. The other options
  61. are `domains`, which returns a newline-delimited list of all discovered peers,
  62. and `json`, which dumps all the aggregated data raw.
  63. The --exclude-suspended (-x) option means that domains that are suspended
  64. instance-wide do not appear in the output and are not included in summaries.
  65. This also excludes subdomains of any of those domains.
  66. LONG_DESC
  67. def crawl(start = nil)
  68. stats = Concurrent::Hash.new
  69. processed = Concurrent::AtomicFixnum.new(0)
  70. failed = Concurrent::AtomicFixnum.new(0)
  71. start_at = Time.now.to_f
  72. seed = start ? [start] : Instance.pluck(:domain)
  73. blocked_domains = Regexp.new('\\.?' + DomainBlock.where(severity: 1).pluck(:domain).join('|') + '$')
  74. progress = create_progress_bar
  75. pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
  76. work_unit = ->(domain) do
  77. next if stats.key?(domain)
  78. next if options[:exclude_suspended] && domain.match?(blocked_domains)
  79. stats[domain] = nil
  80. begin
  81. Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
  82. next unless res.code == 200
  83. stats[domain] = Oj.load(res.to_s)
  84. end
  85. Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
  86. next unless res.code == 200
  87. Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
  88. pool.post(peer, &work_unit)
  89. end
  90. end
  91. Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
  92. next unless res.code == 200
  93. stats[domain]['activity'] = Oj.load(res.to_s)
  94. end
  95. rescue StandardError
  96. failed.increment
  97. ensure
  98. processed.increment
  99. progress.increment unless progress.finished?
  100. end
  101. end
  102. seed.each do |domain|
  103. pool.post(domain, &work_unit)
  104. end
  105. sleep 20
  106. sleep 20 until pool.queue_length.zero?
  107. pool.shutdown
  108. pool.wait_for_termination(20)
  109. ensure
  110. progress.finish
  111. pool.shutdown
  112. case options[:format]
  113. when 'summary'
  114. stats_to_summary(stats, processed, failed, start_at)
  115. when 'domains'
  116. stats_to_domains(stats)
  117. when 'json'
  118. stats_to_json(stats)
  119. end
  120. end
  121. private
  122. def stats_to_summary(stats, processed, failed, start_at)
  123. stats.compact!
  124. total_domains = stats.size
  125. total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
  126. total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
  127. total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
  128. say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
  129. say("Total servers: #{total_domains}", :green)
  130. say("Total registered: #{total_users}", :green)
  131. say("Total active last week: #{total_active}", :green)
  132. say("Total joined last week: #{total_joined}", :green)
  133. end
  134. def stats_to_domains(stats)
  135. say(stats.keys.join("\n"))
  136. end
  137. def stats_to_json(stats)
  138. stats.compact!
  139. say(Oj.dump(stats))
  140. end
  141. end
  142. end