You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

173 lines
6.5 KiB

  1. # frozen_string_literal: true
  2. require 'concurrent'
  3. require_relative '../../config/boot'
  4. require_relative '../../config/environment'
  5. require_relative 'cli_helper'
  6. module Mastodon
  7. class DomainsCLI < Thor
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  13. option :verbose, type: :boolean, aliases: [:v]
  14. option :dry_run, type: :boolean
  15. option :whitelist_mode, type: :boolean
  16. desc 'purge [DOMAIN...]', 'Remove accounts from a DOMAIN without a trace'
  17. long_desc <<-LONG_DESC
  18. Remove all accounts from a given DOMAIN without leaving behind any
  19. records. Unlike a suspension, if the DOMAIN still exists in the wild,
  20. it means the accounts could return if they are resolved again.
  21. When the --whitelist-mode option is given, instead of purging accounts
  22. from a single domain, all accounts from domains that are not whitelisted
  23. are removed from the database.
  24. LONG_DESC
  25. def purge(*domains)
  26. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  27. scope = begin
  28. if options[:whitelist_mode]
  29. Account.remote.where.not(domain: DomainAllow.pluck(:domain))
  30. elsif !domains.empty?
  31. Account.remote.where(domain: domains)
  32. else
  33. say('No domain(s) given', :red)
  34. exit(1)
  35. end
  36. end
  37. processed, = parallelize_with_progress(scope) do |account|
  38. SuspendAccountService.new.call(account, reserve_username: false, skip_side_effects: true) unless options[:dry_run]
  39. end
  40. DomainBlock.where(domain: domains).destroy_all unless options[:dry_run]
  41. say("Removed #{processed} accounts#{dry_run}", :green)
  42. custom_emojis = CustomEmoji.where(domain: domains)
  43. custom_emojis_count = custom_emojis.count
  44. custom_emojis.destroy_all unless options[:dry_run]
  45. say("Removed #{custom_emojis_count} custom emojis", :green)
  46. end
  47. option :concurrency, type: :numeric, default: 50, aliases: [:c]
  48. option :format, type: :string, default: 'summary', aliases: [:f]
  49. option :exclude_suspended, type: :boolean, default: false, aliases: [:x]
  50. desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
  51. long_desc <<-LONG_DESC
  52. Crawl the fediverse by using the Mastodon REST API endpoints that expose
  53. all known peers, and collect statistics from those peers, as long as those
  54. peers support those API endpoints. When no START is given, the command uses
  55. this server's own database of known peers to seed the crawl.
  56. The --concurrency (-c) option controls the number of threads performing HTTP
  57. requests at the same time. More threads means the crawl may complete faster.
  58. The --format (-f) option controls how the data is displayed at the end. By
  59. default (`summary`), a summary of the statistics is returned. The other options
  60. are `domains`, which returns a newline-delimited list of all discovered peers,
  61. and `json`, which dumps all the aggregated data raw.
  62. The --exclude-suspended (-x) option means that domains that are suspended
  63. instance-wide do not appear in the output and are not included in summaries.
  64. This also excludes subdomains of any of those domains.
  65. LONG_DESC
  66. def crawl(start = nil)
  67. stats = Concurrent::Hash.new
  68. processed = Concurrent::AtomicFixnum.new(0)
  69. failed = Concurrent::AtomicFixnum.new(0)
  70. start_at = Time.now.to_f
  71. seed = start ? [start] : Account.remote.domains
  72. blocked_domains = Regexp.new('\\.?' + DomainBlock.where(severity: 1).pluck(:domain).join('|') + '$')
  73. progress = create_progress_bar
  74. pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
  75. work_unit = ->(domain) do
  76. next if stats.key?(domain)
  77. next if options[:exclude_suspended] && domain.match(blocked_domains)
  78. stats[domain] = nil
  79. begin
  80. Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
  81. next unless res.code == 200
  82. stats[domain] = Oj.load(res.to_s)
  83. end
  84. Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
  85. next unless res.code == 200
  86. Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
  87. pool.post(peer, &work_unit)
  88. end
  89. end
  90. Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
  91. next unless res.code == 200
  92. stats[domain]['activity'] = Oj.load(res.to_s)
  93. end
  94. rescue StandardError
  95. failed.increment
  96. ensure
  97. processed.increment
  98. progress.increment unless progress.finished?
  99. end
  100. end
  101. seed.each do |domain|
  102. pool.post(domain, &work_unit)
  103. end
  104. sleep 20
  105. sleep 20 until pool.queue_length.zero?
  106. pool.shutdown
  107. pool.wait_for_termination(20)
  108. ensure
  109. progress.finish
  110. pool.shutdown
  111. case options[:format]
  112. when 'summary'
  113. stats_to_summary(stats, processed, failed, start_at)
  114. when 'domains'
  115. stats_to_domains(stats)
  116. when 'json'
  117. stats_to_json(stats)
  118. end
  119. end
  120. private
  121. def stats_to_summary(stats, processed, failed, start_at)
  122. stats.compact!
  123. total_domains = stats.size
  124. total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
  125. total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
  126. total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
  127. say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
  128. say("Total servers: #{total_domains}", :green)
  129. say("Total registered: #{total_users}", :green)
  130. say("Total active last week: #{total_active}", :green)
  131. say("Total joined last week: #{total_joined}", :green)
  132. end
  133. def stats_to_domains(stats)
  134. say(stats.keys.join("\n"))
  135. end
  136. def stats_to_json(stats)
  137. stats.compact!
  138. say(Oj.dump(stats))
  139. end
  140. end
  141. end