You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
12 KiB

  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = media_attachment.file_file_size + (media_attachment.thumbnail_file_size || 0)
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.thumbnail.destroy
  31. media_attachment.save
  32. end
  33. size
  34. end
  35. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  36. end
  37. option :start_after
  38. option :prefix
  39. option :dry_run, type: :boolean, default: false
  40. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  41. long_desc <<~LONG_DESC
  42. Scans file storage for files that do not belong to existing media attachments. Because this operation
  43. requires iterating over every single file individually, it will be slow.
  44. Please mind that some storage providers charge for the necessary API requests to list objects.
  45. LONG_DESC
  46. def remove_orphans
  47. progress = create_progress_bar(nil)
  48. reclaimed_bytes = 0
  49. removed = 0
  50. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  51. prefix = options[:prefix]
  52. case Paperclip::Attachment.default_options[:storage]
  53. when :s3
  54. paperclip_instance = MediaAttachment.new.file
  55. s3_interface = paperclip_instance.s3_interface
  56. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  57. last_key = options[:start_after]
  58. loop do
  59. objects = begin
  60. begin
  61. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  62. rescue => e
  63. progress.log(pastel.red("Error fetching list of files: #{e}"))
  64. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  65. break
  66. end
  67. end
  68. break if objects.empty?
  69. last_key = objects.last.key
  70. record_map = preload_records_from_mixed_objects(objects)
  71. objects.each do |object|
  72. path_segments = object.key.split('/')
  73. path_segments.delete('cache')
  74. unless [7, 10].include?(path_segments.size)
  75. progress.log(pastel.yellow("Unrecognized file found: #{object.key}"))
  76. next
  77. end
  78. model_name = path_segments.first.classify
  79. attachment_name = path_segments[1].singularize
  80. record_id = path_segments[2..-2].join.to_i
  81. file_name = path_segments.last
  82. record = record_map.dig(model_name, record_id)
  83. attachment = record&.public_send(attachment_name)
  84. progress.increment
  85. next unless attachment.blank? || !attachment.variant?(file_name)
  86. begin
  87. object.delete unless options[:dry_run]
  88. reclaimed_bytes += object.size
  89. removed += 1
  90. progress.log("Found and removed orphan: #{object.key}")
  91. rescue => e
  92. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  93. end
  94. end
  95. end
  96. when :fog
  97. say('The fog storage driver is not supported for this operation at this time', :red)
  98. exit(1)
  99. when :filesystem
  100. require 'find'
  101. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  102. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  103. next if File.directory?(path)
  104. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  105. path_segments = key.split(File::SEPARATOR)
  106. path_segments.delete('cache')
  107. unless [7, 10].include?(path_segments.size)
  108. progress.log(pastel.yellow("Unrecognized file found: #{key}"))
  109. next
  110. end
  111. model_name = path_segments.first.classify
  112. record_id = path_segments[2..-2].join.to_i
  113. attachment_name = path_segments[1].singularize
  114. file_name = path_segments.last
  115. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  116. record = model_name.constantize.find_by(id: record_id)
  117. attachment = record&.public_send(attachment_name)
  118. progress.increment
  119. next unless attachment.blank? || !attachment.variant?(file_name)
  120. begin
  121. size = File.size(path)
  122. unless options[:dry_run]
  123. File.delete(path)
  124. begin
  125. FileUtils.rmdir(File.dirname(path), parents: true)
  126. rescue Errno::ENOTEMPTY
  127. # OK
  128. end
  129. end
  130. reclaimed_bytes += size
  131. removed += 1
  132. progress.log("Found and removed orphan: #{key}")
  133. rescue => e
  134. progress.log(pastel.red("Error processing #{key}: #{e}"))
  135. end
  136. end
  137. end
  138. progress.total = progress.progress
  139. progress.finish
  140. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  141. end
  142. option :account, type: :string
  143. option :domain, type: :string
  144. option :status, type: :numeric
  145. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  146. option :verbose, type: :boolean, default: false, aliases: [:v]
  147. option :dry_run, type: :boolean, default: false
  148. option :force, type: :boolean, default: false
  149. desc 'refresh', 'Fetch remote media files'
  150. long_desc <<-DESC
  151. Re-downloads media attachments from other servers. You must specify the
  152. source of media attachments with one of the following options:
  153. Use the --status option to download attachments from a specific status,
  154. using the status local numeric ID.
  155. Use the --account option to download attachments from a specific account,
  156. using username@domain handle of the account.
  157. Use the --domain option to download attachments from a specific domain.
  158. By default, attachments that are believed to be already downloaded will
  159. not be re-downloaded. To force re-download of every URL, use --force.
  160. DESC
  161. def refresh
  162. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  163. if options[:status]
  164. scope = MediaAttachment.where(status_id: options[:status])
  165. elsif options[:account]
  166. username, domain = options[:account].split('@')
  167. account = Account.find_remote(username, domain)
  168. if account.nil?
  169. say('No such account', :red)
  170. exit(1)
  171. end
  172. scope = MediaAttachment.where(account_id: account.id)
  173. elsif options[:domain]
  174. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  175. else
  176. exit(1)
  177. end
  178. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  179. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  180. unless options[:dry_run]
  181. media_attachment.reset_file!
  182. media_attachment.reset_thumbnail!
  183. media_attachment.save
  184. end
  185. media_attachment.file_file_size + (media_attachment.thumbnail_file_size || 0)
  186. end
  187. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  188. end
  189. desc 'usage', 'Calculate disk space consumed by Mastodon'
  190. def usage
  191. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} local)")
  192. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  193. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  194. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  195. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  196. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  197. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  198. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  199. end
  200. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  201. def lookup(url)
  202. path = Addressable::URI.parse(url).path
  203. path_segments = path.split('/')[2..-1]
  204. path_segments.delete('cache')
  205. unless [7, 10].include?(path_segments.size)
  206. say('Not a media URL', :red)
  207. exit(1)
  208. end
  209. model_name = path_segments.first.classify
  210. record_id = path_segments[2..-2].join.to_i
  211. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  212. say("Cannot find corresponding model: #{model_name}", :red)
  213. exit(1)
  214. end
  215. record = model_name.constantize.find_by(id: record_id)
  216. record = record.status if record.respond_to?(:status)
  217. unless record
  218. say('Cannot find corresponding record', :red)
  219. exit(1)
  220. end
  221. display_url = ActivityPub::TagManager.instance.url_for(record)
  222. if display_url.blank?
  223. say('No public URL for this type of record', :red)
  224. exit(1)
  225. end
  226. say(display_url, :blue)
  227. rescue Addressable::URI::InvalidURIError
  228. say('Invalid URL', :red)
  229. exit(1)
  230. end
  231. private
  232. PRELOAD_MODEL_WHITELIST = %w(
  233. Account
  234. Backup
  235. CustomEmoji
  236. Import
  237. MediaAttachment
  238. PreviewCard
  239. SiteUpload
  240. ).freeze
  241. def preload_records_from_mixed_objects(objects)
  242. preload_map = Hash.new { |hash, key| hash[key] = [] }
  243. objects.map do |object|
  244. segments = object.key.split('/')
  245. segments.delete('cache')
  246. next unless [7, 10].include?(segments.size)
  247. model_name = segments.first.classify
  248. record_id = segments[2..-2].join.to_i
  249. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  250. preload_map[model_name] << record_id
  251. end
  252. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  253. model_map[model_name] = model_name.constantize.where(id: record_ids).each_with_object({}) { |record, record_map| record_map[record.id] = record }
  254. end
  255. end
  256. end
  257. end