You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
5.5 KiB

  1. # frozen_string_literal: true
  2. require 'concurrent'
  3. require_relative '../../config/boot'
  4. require_relative '../../config/environment'
  5. require_relative 'cli_helper'
  6. module Mastodon
  7. class DomainsCLI < Thor
  8. def self.exit_on_failure?
  9. true
  10. end
  11. option :dry_run, type: :boolean
  12. desc 'purge DOMAIN', 'Remove accounts from a DOMAIN without a trace'
  13. long_desc <<-LONG_DESC
  14. Remove all accounts from a given DOMAIN without leaving behind any
  15. records. Unlike a suspension, if the DOMAIN still exists in the wild,
  16. it means the accounts could return if they are resolved again.
  17. LONG_DESC
  18. def purge(domain)
  19. removed = 0
  20. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  21. Account.where(domain: domain).find_each do |account|
  22. SuspendAccountService.new.call(account, destroy: true) unless options[:dry_run]
  23. removed += 1
  24. say('.', :green, false)
  25. end
  26. DomainBlock.where(domain: domain).destroy_all unless options[:dry_run]
  27. say
  28. say("Removed #{removed} accounts#{dry_run}", :green)
  29. custom_emojis = CustomEmoji.where(domain: domain)
  30. custom_emojis_count = custom_emojis.count
  31. custom_emojis.destroy_all unless options[:dry_run]
  32. say("Removed #{custom_emojis_count} custom emojis", :green)
  33. end
  34. option :concurrency, type: :numeric, default: 50, aliases: [:c]
  35. option :silent, type: :boolean, default: false, aliases: [:s]
  36. option :format, type: :string, default: 'summary', aliases: [:f]
  37. desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
  38. long_desc <<-LONG_DESC
  39. Crawl the fediverse by using the Mastodon REST API endpoints that expose
  40. all known peers, and collect statistics from those peers, as long as those
  41. peers support those API endpoints. When no START is given, the command uses
  42. this server's own database of known peers to seed the crawl.
  43. The --concurrency (-c) option controls the number of threads performing HTTP
  44. requests at the same time. More threads means the crawl may complete faster.
  45. The --silent (-s) option controls progress output.
  46. The --format (-f) option controls how the data is displayed at the end. By
  47. default (`summary`), a summary of the statistics is returned. The other options
  48. are `domains`, which returns a newline-delimited list of all discovered peers,
  49. and `json`, which dumps all the aggregated data raw.
  50. LONG_DESC
  51. def crawl(start = nil)
  52. stats = Concurrent::Hash.new
  53. processed = Concurrent::AtomicFixnum.new(0)
  54. failed = Concurrent::AtomicFixnum.new(0)
  55. start_at = Time.now.to_f
  56. seed = start ? [start] : Account.remote.domains
  57. pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
  58. work_unit = ->(domain) do
  59. next if stats.key?(domain)
  60. stats[domain] = nil
  61. processed.increment
  62. begin
  63. Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
  64. next unless res.code == 200
  65. stats[domain] = Oj.load(res.to_s)
  66. end
  67. Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
  68. next unless res.code == 200
  69. Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
  70. pool.post(peer, &work_unit)
  71. end
  72. end
  73. Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
  74. next unless res.code == 200
  75. stats[domain]['activity'] = Oj.load(res.to_s)
  76. end
  77. say('.', :green, false) unless options[:silent]
  78. rescue StandardError
  79. failed.increment
  80. say('.', :red, false) unless options[:silent]
  81. end
  82. end
  83. seed.each do |domain|
  84. pool.post(domain, &work_unit)
  85. end
  86. sleep 20
  87. sleep 20 until pool.queue_length.zero?
  88. pool.shutdown
  89. pool.wait_for_termination(20)
  90. ensure
  91. pool.shutdown
  92. say unless options[:silent]
  93. case options[:format]
  94. when 'summary'
  95. stats_to_summary(stats, processed, failed, start_at)
  96. when 'domains'
  97. stats_to_domains(stats)
  98. when 'json'
  99. stats_to_json(stats)
  100. end
  101. end
  102. private
  103. def stats_to_summary(stats, processed, failed, start_at)
  104. stats.compact!
  105. total_domains = stats.size
  106. total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
  107. total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
  108. total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
  109. say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
  110. say("Total servers: #{total_domains}", :green)
  111. say("Total registered: #{total_users}", :green)
  112. say("Total active last week: #{total_active}", :green)
  113. say("Total joined last week: #{total_joined}", :green)
  114. end
  115. def stats_to_domains(stats)
  116. say(stats.keys.join("\n"))
  117. end
  118. def stats_to_json(stats)
  119. stats.compact!
  120. say(Oj.dump(stats))
  121. end
  122. end
  123. end