You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

327 lines
11 KiB

  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = media_attachment.file_file_size
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.save
  31. end
  32. size
  33. end
  34. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  35. end
  36. option :start_after
  37. option :prefix
  38. option :dry_run, type: :boolean, default: false
  39. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  40. long_desc <<~LONG_DESC
  41. Scans file storage for files that do not belong to existing media attachments. Because this operation
  42. requires iterating over every single file individually, it will be slow.
  43. Please mind that some storage providers charge for the necessary API requests to list objects.
  44. LONG_DESC
  45. def remove_orphans
  46. progress = create_progress_bar(nil)
  47. reclaimed_bytes = 0
  48. removed = 0
  49. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  50. prefix = options[:prefix]
  51. case Paperclip::Attachment.default_options[:storage]
  52. when :s3
  53. paperclip_instance = MediaAttachment.new.file
  54. s3_interface = paperclip_instance.s3_interface
  55. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  56. last_key = options[:start_after]
  57. loop do
  58. objects = begin
  59. begin
  60. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  61. rescue => e
  62. progress.log(pastel.red("Error fetching list of files: #{e}"))
  63. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  64. break
  65. end
  66. end
  67. break if objects.empty?
  68. last_key = objects.last.key
  69. record_map = preload_records_from_mixed_objects(objects)
  70. objects.each do |object|
  71. path_segments = object.key.split('/')
  72. path_segments.delete('cache')
  73. if path_segments.size != 7
  74. progress.log(pastel.yellow("Unrecognized file found: #{object.key}"))
  75. next
  76. end
  77. model_name = path_segments.first.classify
  78. attachment_name = path_segments[1].singularize
  79. record_id = path_segments[2..-2].join.to_i
  80. file_name = path_segments.last
  81. record = record_map.dig(model_name, record_id)
  82. attachment = record&.public_send(attachment_name)
  83. progress.increment
  84. next unless attachment.blank? || !attachment.variant?(file_name)
  85. begin
  86. object.delete unless options[:dry_run]
  87. reclaimed_bytes += object.size
  88. removed += 1
  89. progress.log("Found and removed orphan: #{object.key}")
  90. rescue => e
  91. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  92. end
  93. end
  94. end
  95. when :fog
  96. say('The fog storage driver is not supported for this operation at this time', :red)
  97. exit(1)
  98. when :filesystem
  99. require 'find'
  100. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  101. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  102. next if File.directory?(path)
  103. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  104. path_segments = key.split(File::SEPARATOR)
  105. path_segments.delete('cache')
  106. if path_segments.size != 7
  107. progress.log(pastel.yellow("Unrecognized file found: #{key}"))
  108. next
  109. end
  110. model_name = path_segments.first.classify
  111. record_id = path_segments[2..-2].join.to_i
  112. attachment_name = path_segments[1].singularize
  113. file_name = path_segments.last
  114. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  115. record = model_name.constantize.find_by(id: record_id)
  116. attachment = record&.public_send(attachment_name)
  117. progress.increment
  118. next unless attachment.blank? || !attachment.variant?(file_name)
  119. begin
  120. size = File.size(path)
  121. unless options[:dry_run]
  122. File.delete(path)
  123. begin
  124. FileUtils.rmdir(File.dirname(path), parents: true)
  125. rescue Errno::ENOTEMPTY
  126. # OK
  127. end
  128. end
  129. reclaimed_bytes += size
  130. removed += 1
  131. progress.log("Found and removed orphan: #{key}")
  132. rescue => e
  133. progress.log(pastel.red("Error processing #{key}: #{e}"))
  134. end
  135. end
  136. end
  137. progress.total = progress.progress
  138. progress.finish
  139. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  140. end
  141. option :account, type: :string
  142. option :domain, type: :string
  143. option :status, type: :numeric
  144. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  145. option :verbose, type: :boolean, default: false, aliases: [:v]
  146. option :dry_run, type: :boolean, default: false
  147. option :force, type: :boolean, default: false
  148. desc 'refresh', 'Fetch remote media files'
  149. long_desc <<-DESC
  150. Re-downloads media attachments from other servers. You must specify the
  151. source of media attachments with one of the following options:
  152. Use the --status option to download attachments from a specific status,
  153. using the status local numeric ID.
  154. Use the --account option to download attachments from a specific account,
  155. using username@domain handle of the account.
  156. Use the --domain option to download attachments from a specific domain.
  157. By default, attachments that are believed to be already downloaded will
  158. not be re-downloaded. To force re-download of every URL, use --force.
  159. DESC
  160. def refresh
  161. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  162. if options[:status]
  163. scope = MediaAttachment.where(status_id: options[:status])
  164. elsif options[:account]
  165. username, domain = options[:account].split('@')
  166. account = Account.find_remote(username, domain)
  167. if account.nil?
  168. say('No such account', :red)
  169. exit(1)
  170. end
  171. scope = MediaAttachment.where(account_id: account.id)
  172. elsif options[:domain]
  173. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  174. else
  175. exit(1)
  176. end
  177. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  178. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  179. unless options[:dry_run]
  180. media_attachment.file_remote_url = media_attachment.remote_url
  181. media_attachment.save
  182. end
  183. media_attachment.file_file_size
  184. end
  185. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  186. end
  187. desc 'usage', 'Calculate disk space consumed by Mastodon'
  188. def usage
  189. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(:file_file_size))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(:file_file_size))} local)")
  190. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  191. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  192. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  193. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  194. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  195. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  196. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  197. end
  198. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  199. def lookup(url)
  200. path = Addressable::URI.parse(url).path
  201. path_segments = path.split('/')[2..-1]
  202. path_segments.delete('cache')
  203. if path_segments.size != 7
  204. say('Not a media URL', :red)
  205. exit(1)
  206. end
  207. model_name = path_segments.first.classify
  208. record_id = path_segments[2..-2].join.to_i
  209. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  210. say("Cannot find corresponding model: #{model_name}", :red)
  211. exit(1)
  212. end
  213. record = model_name.constantize.find_by(id: record_id)
  214. record = record.status if record.respond_to?(:status)
  215. unless record
  216. say('Cannot find corresponding record', :red)
  217. exit(1)
  218. end
  219. display_url = ActivityPub::TagManager.instance.url_for(record)
  220. if display_url.blank?
  221. say('No public URL for this type of record', :red)
  222. exit(1)
  223. end
  224. say(display_url, :blue)
  225. rescue Addressable::URI::InvalidURIError
  226. say('Invalid URL', :red)
  227. exit(1)
  228. end
  229. private
  230. PRELOAD_MODEL_WHITELIST = %w(
  231. Account
  232. Backup
  233. CustomEmoji
  234. Import
  235. MediaAttachment
  236. PreviewCard
  237. SiteUpload
  238. ).freeze
  239. def preload_records_from_mixed_objects(objects)
  240. preload_map = Hash.new { |hash, key| hash[key] = [] }
  241. objects.map do |object|
  242. segments = object.key.split('/')
  243. segments.delete('cache')
  244. next if segments.size != 7
  245. model_name = segments.first.classify
  246. record_id = segments[2..-2].join.to_i
  247. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  248. preload_map[model_name] << record_id
  249. end
  250. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  251. model_map[model_name] = model_name.constantize.where(id: record_ids).each_with_object({}) { |record, record_map| record_map[record.id] = record }
  252. end
  253. end
  254. end
  255. end