You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

310 lines
11 KiB

  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = media_attachment.file_file_size
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.save
  31. end
  32. size
  33. end
  34. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  35. end
  36. option :start_after
  37. option :prefix
  38. option :dry_run, type: :boolean, default: false
  39. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  40. long_desc <<~LONG_DESC
  41. Scans file storage for files that do not belong to existing media attachments. Because this operation
  42. requires iterating over every single file individually, it will be slow.
  43. Please mind that some storage providers charge for the necessary API requests to list objects.
  44. LONG_DESC
  45. def remove_orphans
  46. progress = create_progress_bar(nil)
  47. reclaimed_bytes = 0
  48. removed = 0
  49. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  50. prefix = options[:prefix]
  51. case Paperclip::Attachment.default_options[:storage]
  52. when :s3
  53. paperclip_instance = MediaAttachment.new.file
  54. s3_interface = paperclip_instance.s3_interface
  55. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  56. last_key = options[:start_after]
  57. loop do
  58. objects = begin
  59. begin
  60. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  61. rescue => e
  62. progress.log(pastel.red("Error fetching list of files: #{e}"))
  63. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  64. break
  65. end
  66. end
  67. break if objects.empty?
  68. last_key = objects.last.key
  69. record_map = preload_records_from_mixed_objects(objects)
  70. objects.each do |object|
  71. path_segments = object.key.split('/')
  72. path_segments.delete('cache')
  73. model_name = path_segments.first.classify
  74. attachment_name = path_segments[1].singularize
  75. record_id = path_segments[2..-2].join.to_i
  76. file_name = path_segments.last
  77. record = record_map.dig(model_name, record_id)
  78. attachment = record&.public_send(attachment_name)
  79. progress.increment
  80. next unless attachment.blank? || !attachment.variant?(file_name)
  81. begin
  82. object.delete unless options[:dry_run]
  83. reclaimed_bytes += object.size
  84. removed += 1
  85. progress.log("Found and removed orphan: #{object.key}")
  86. rescue => e
  87. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  88. end
  89. end
  90. end
  91. when :fog
  92. say('The fog storage driver is not supported for this operation at this time', :red)
  93. exit(1)
  94. when :filesystem
  95. require 'find'
  96. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  97. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  98. next if File.directory?(path)
  99. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  100. path_segments = key.split(File::SEPARATOR)
  101. path_segments.delete('cache')
  102. model_name = path_segments.first.classify
  103. record_id = path_segments[2..-2].join.to_i
  104. attachment_name = path_segments[1].singularize
  105. file_name = path_segments.last
  106. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  107. record = model_name.constantize.find_by(id: record_id)
  108. attachment = record&.public_send(attachment_name)
  109. progress.increment
  110. next unless attachment.blank? || !attachment.variant?(file_name)
  111. begin
  112. size = File.size(path)
  113. unless options[:dry_run]
  114. File.delete(path)
  115. begin
  116. FileUtils.rmdir(File.dirname(path), parents: true)
  117. rescue Errno::ENOTEMPTY
  118. # OK
  119. end
  120. end
  121. reclaimed_bytes += size
  122. removed += 1
  123. progress.log("Found and removed orphan: #{key}")
  124. rescue => e
  125. progress.log(pastel.red("Error processing #{key}: #{e}"))
  126. end
  127. end
  128. end
  129. progress.total = progress.progress
  130. progress.finish
  131. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  132. end
  133. option :account, type: :string
  134. option :domain, type: :string
  135. option :status, type: :numeric
  136. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  137. option :verbose, type: :boolean, default: false, aliases: [:v]
  138. option :dry_run, type: :boolean, default: false
  139. option :force, type: :boolean, default: false
  140. desc 'refresh', 'Fetch remote media files'
  141. long_desc <<-DESC
  142. Re-downloads media attachments from other servers. You must specify the
  143. source of media attachments with one of the following options:
  144. Use the --status option to download attachments from a specific status,
  145. using the status local numeric ID.
  146. Use the --account option to download attachments from a specific account,
  147. using username@domain handle of the account.
  148. Use the --domain option to download attachments from a specific domain.
  149. By default, attachments that are believed to be already downloaded will
  150. not be re-downloaded. To force re-download of every URL, use --force.
  151. DESC
  152. def refresh
  153. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  154. if options[:status]
  155. scope = MediaAttachment.where(status_id: options[:status])
  156. elsif options[:account]
  157. username, domain = username.split('@')
  158. account = Account.find_remote(username, domain)
  159. if account.nil?
  160. say('No such account', :red)
  161. exit(1)
  162. end
  163. scope = MediaAttachment.where(account_id: account.id)
  164. elsif options[:domain]
  165. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  166. else
  167. exit(1)
  168. end
  169. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  170. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  171. unless options[:dry_run]
  172. media_attachment.reset_file!
  173. media_attachment.save
  174. end
  175. media_attachment.file_file_size
  176. end
  177. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  178. end
  179. desc 'usage', 'Calculate disk space consumed by Mastodon'
  180. def usage
  181. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(:file_file_size))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(:file_file_size))} local)")
  182. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  183. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  184. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  185. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  186. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  187. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  188. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  189. end
  190. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  191. def lookup(url)
  192. path = Addressable::URI.parse(url).path
  193. path_segments = path.split('/')[2..-1]
  194. path_segments.delete('cache')
  195. model_name = path_segments.first.classify
  196. record_id = path_segments[2..-2].join.to_i
  197. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  198. say("Cannot find corresponding model: #{model_name}", :red)
  199. exit(1)
  200. end
  201. record = model_name.constantize.find_by(id: record_id)
  202. record = record.status if record.respond_to?(:status)
  203. unless record
  204. say('Cannot find corresponding record', :red)
  205. exit(1)
  206. end
  207. display_url = ActivityPub::TagManager.instance.url_for(record)
  208. if display_url.blank?
  209. say('No public URL for this type of record', :red)
  210. exit(1)
  211. end
  212. say(display_url, :blue)
  213. rescue Addressable::URI::InvalidURIError
  214. say('Invalid URL', :red)
  215. exit(1)
  216. end
  217. private
  218. PRELOAD_MODEL_WHITELIST = %w(
  219. Account
  220. Backup
  221. CustomEmoji
  222. Import
  223. MediaAttachment
  224. PreviewCard
  225. SiteUpload
  226. ).freeze
  227. def preload_records_from_mixed_objects(objects)
  228. preload_map = Hash.new { |hash, key| hash[key] = [] }
  229. objects.map do |object|
  230. segments = object.key.split('/')
  231. segments.delete('cache')
  232. model_name = segments.first.classify
  233. record_id = segments[2..-2].join.to_i
  234. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  235. preload_map[model_name] << record_id
  236. end
  237. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  238. model_map[model_name] = model_name.constantize.where(id: record_ids).each_with_object({}) { |record, record_map| record_map[record.id] = record }
  239. end
  240. end
  241. end
  242. end