You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

332 lines
12 KiB

  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = (media_attachment.file_file_size || 0) + (media_attachment.thumbnail_file_size || 0)
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.thumbnail.destroy
  31. media_attachment.save
  32. end
  33. size
  34. end
  35. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  36. end
  37. option :start_after
  38. option :prefix
  39. option :fix_permissions, type: :boolean, default: false
  40. option :dry_run, type: :boolean, default: false
  41. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  42. long_desc <<~LONG_DESC
  43. Scans file storage for files that do not belong to existing media attachments. Because this operation
  44. requires iterating over every single file individually, it will be slow.
  45. Please mind that some storage providers charge for the necessary API requests to list objects.
  46. LONG_DESC
  47. def remove_orphans
  48. progress = create_progress_bar(nil)
  49. reclaimed_bytes = 0
  50. removed = 0
  51. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  52. prefix = options[:prefix]
  53. case Paperclip::Attachment.default_options[:storage]
  54. when :s3
  55. paperclip_instance = MediaAttachment.new.file
  56. s3_interface = paperclip_instance.s3_interface
  57. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  58. last_key = options[:start_after]
  59. loop do
  60. objects = begin
  61. begin
  62. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  63. rescue => e
  64. progress.log(pastel.red("Error fetching list of files: #{e}"))
  65. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  66. break
  67. end
  68. end
  69. break if objects.empty?
  70. last_key = objects.last.key
  71. record_map = preload_records_from_mixed_objects(objects)
  72. objects.each do |object|
  73. object.acl.put(acl: 'public-read') if options[:fix_permissions] && !options[:dry_run]
  74. path_segments = object.key.split('/')
  75. path_segments.delete('cache')
  76. unless [7, 10].include?(path_segments.size)
  77. progress.log(pastel.yellow("Unrecognized file found: #{object.key}"))
  78. next
  79. end
  80. model_name = path_segments.first.classify
  81. attachment_name = path_segments[1].singularize
  82. record_id = path_segments[2..-2].join.to_i
  83. file_name = path_segments.last
  84. record = record_map.dig(model_name, record_id)
  85. attachment = record&.public_send(attachment_name)
  86. progress.increment
  87. next unless attachment.blank? || !attachment.variant?(file_name)
  88. begin
  89. object.delete unless options[:dry_run]
  90. reclaimed_bytes += object.size
  91. removed += 1
  92. progress.log("Found and removed orphan: #{object.key}")
  93. rescue => e
  94. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  95. end
  96. end
  97. end
  98. when :fog
  99. say('The fog storage driver is not supported for this operation at this time', :red)
  100. exit(1)
  101. when :filesystem
  102. require 'find'
  103. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  104. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  105. next if File.directory?(path)
  106. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  107. path_segments = key.split(File::SEPARATOR)
  108. path_segments.delete('cache')
  109. unless [7, 10].include?(path_segments.size)
  110. progress.log(pastel.yellow("Unrecognized file found: #{key}"))
  111. next
  112. end
  113. model_name = path_segments.first.classify
  114. record_id = path_segments[2..-2].join.to_i
  115. attachment_name = path_segments[1].singularize
  116. file_name = path_segments.last
  117. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  118. record = model_name.constantize.find_by(id: record_id)
  119. attachment = record&.public_send(attachment_name)
  120. progress.increment
  121. next unless attachment.blank? || !attachment.variant?(file_name)
  122. begin
  123. size = File.size(path)
  124. unless options[:dry_run]
  125. File.delete(path)
  126. begin
  127. FileUtils.rmdir(File.dirname(path), parents: true)
  128. rescue Errno::ENOTEMPTY
  129. # OK
  130. end
  131. end
  132. reclaimed_bytes += size
  133. removed += 1
  134. progress.log("Found and removed orphan: #{key}")
  135. rescue => e
  136. progress.log(pastel.red("Error processing #{key}: #{e}"))
  137. end
  138. end
  139. end
  140. progress.total = progress.progress
  141. progress.finish
  142. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  143. end
  144. option :account, type: :string
  145. option :domain, type: :string
  146. option :status, type: :numeric
  147. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  148. option :verbose, type: :boolean, default: false, aliases: [:v]
  149. option :dry_run, type: :boolean, default: false
  150. option :force, type: :boolean, default: false
  151. desc 'refresh', 'Fetch remote media files'
  152. long_desc <<-DESC
  153. Re-downloads media attachments from other servers. You must specify the
  154. source of media attachments with one of the following options:
  155. Use the --status option to download attachments from a specific status,
  156. using the status local numeric ID.
  157. Use the --account option to download attachments from a specific account,
  158. using username@domain handle of the account.
  159. Use the --domain option to download attachments from a specific domain.
  160. By default, attachments that are believed to be already downloaded will
  161. not be re-downloaded. To force re-download of every URL, use --force.
  162. DESC
  163. def refresh
  164. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  165. if options[:status]
  166. scope = MediaAttachment.where(status_id: options[:status])
  167. elsif options[:account]
  168. username, domain = options[:account].split('@')
  169. account = Account.find_remote(username, domain)
  170. if account.nil?
  171. say('No such account', :red)
  172. exit(1)
  173. end
  174. scope = MediaAttachment.where(account_id: account.id)
  175. elsif options[:domain]
  176. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  177. else
  178. exit(1)
  179. end
  180. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  181. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  182. unless options[:dry_run]
  183. media_attachment.reset_file!
  184. media_attachment.reset_thumbnail!
  185. media_attachment.save
  186. end
  187. media_attachment.file_file_size + (media_attachment.thumbnail_file_size || 0)
  188. end
  189. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  190. end
  191. desc 'usage', 'Calculate disk space consumed by Mastodon'
  192. def usage
  193. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} local)")
  194. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  195. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  196. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  197. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  198. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  199. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  200. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  201. end
  202. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  203. def lookup(url)
  204. path = Addressable::URI.parse(url).path
  205. path_segments = path.split('/')[2..-1]
  206. path_segments.delete('cache')
  207. unless [7, 10].include?(path_segments.size)
  208. say('Not a media URL', :red)
  209. exit(1)
  210. end
  211. model_name = path_segments.first.classify
  212. record_id = path_segments[2..-2].join.to_i
  213. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  214. say("Cannot find corresponding model: #{model_name}", :red)
  215. exit(1)
  216. end
  217. record = model_name.constantize.find_by(id: record_id)
  218. record = record.status if record.respond_to?(:status)
  219. unless record
  220. say('Cannot find corresponding record', :red)
  221. exit(1)
  222. end
  223. display_url = ActivityPub::TagManager.instance.url_for(record)
  224. if display_url.blank?
  225. say('No public URL for this type of record', :red)
  226. exit(1)
  227. end
  228. say(display_url, :blue)
  229. rescue Addressable::URI::InvalidURIError
  230. say('Invalid URL', :red)
  231. exit(1)
  232. end
  233. private
  234. PRELOAD_MODEL_WHITELIST = %w(
  235. Account
  236. Backup
  237. CustomEmoji
  238. Import
  239. MediaAttachment
  240. PreviewCard
  241. SiteUpload
  242. ).freeze
  243. def preload_records_from_mixed_objects(objects)
  244. preload_map = Hash.new { |hash, key| hash[key] = [] }
  245. objects.map do |object|
  246. segments = object.key.split('/')
  247. segments.delete('cache')
  248. next unless [7, 10].include?(segments.size)
  249. model_name = segments.first.classify
  250. record_id = segments[2..-2].join.to_i
  251. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  252. preload_map[model_name] << record_id
  253. end
  254. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  255. model_map[model_name] = model_name.constantize.where(id: record_ids).each_with_object({}) { |record, record_map| record_map[record.id] = record }
  256. end
  257. end
  258. end
  259. end