You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

334 lines
12 KiB

  1. # frozen_string_literal: true
  2. require_relative '../../config/boot'
  3. require_relative '../../config/environment'
  4. require_relative 'cli_helper'
  5. module Mastodon
  6. class MediaCLI < Thor
  7. include ActionView::Helpers::NumberHelper
  8. include CLIHelper
  9. def self.exit_on_failure?
  10. true
  11. end
  12. option :days, type: :numeric, default: 7, aliases: [:d]
  13. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  14. option :verbose, type: :boolean, default: false, aliases: [:v]
  15. option :dry_run, type: :boolean, default: false
  16. desc 'remove', 'Remove remote media files'
  17. long_desc <<-DESC
  18. Removes locally cached copies of media attachments from other servers.
  19. The --days option specifies how old media attachments have to be before
  20. they are removed. It defaults to 7 days.
  21. DESC
  22. def remove
  23. time_ago = options[:days].days.ago
  24. dry_run = options[:dry_run] ? '(DRY RUN)' : ''
  25. processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
  26. next if media_attachment.file.blank?
  27. size = (media_attachment.file_file_size || 0) + (media_attachment.thumbnail_file_size || 0)
  28. unless options[:dry_run]
  29. media_attachment.file.destroy
  30. media_attachment.thumbnail.destroy
  31. media_attachment.save
  32. end
  33. size
  34. end
  35. say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
  36. end
  37. option :start_after
  38. option :prefix
  39. option :fix_permissions, type: :boolean, default: false
  40. option :dry_run, type: :boolean, default: false
  41. desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
  42. long_desc <<~LONG_DESC
  43. Scans file storage for files that do not belong to existing media attachments. Because this operation
  44. requires iterating over every single file individually, it will be slow.
  45. Please mind that some storage providers charge for the necessary API requests to list objects.
  46. LONG_DESC
  47. def remove_orphans
  48. progress = create_progress_bar(nil)
  49. reclaimed_bytes = 0
  50. removed = 0
  51. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  52. prefix = options[:prefix]
  53. case Paperclip::Attachment.default_options[:storage]
  54. when :s3
  55. paperclip_instance = MediaAttachment.new.file
  56. s3_interface = paperclip_instance.s3_interface
  57. s3_permissions = Paperclip::Attachment.default_options[:s3_permissions]
  58. bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
  59. last_key = options[:start_after]
  60. loop do
  61. objects = begin
  62. begin
  63. bucket.objects(start_after: last_key, prefix: prefix).limit(1000).map { |x| x }
  64. rescue => e
  65. progress.log(pastel.red("Error fetching list of files: #{e}"))
  66. progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
  67. break
  68. end
  69. end
  70. break if objects.empty?
  71. last_key = objects.last.key
  72. record_map = preload_records_from_mixed_objects(objects)
  73. objects.each do |object|
  74. object.acl.put(acl: s3_permissions) if options[:fix_permissions] && !options[:dry_run]
  75. path_segments = object.key.split('/')
  76. path_segments.delete('cache')
  77. unless [7, 10].include?(path_segments.size)
  78. progress.log(pastel.yellow("Unrecognized file found: #{object.key}"))
  79. next
  80. end
  81. model_name = path_segments.first.classify
  82. attachment_name = path_segments[1].singularize
  83. record_id = path_segments[2..-2].join.to_i
  84. file_name = path_segments.last
  85. record = record_map.dig(model_name, record_id)
  86. attachment = record&.public_send(attachment_name)
  87. progress.increment
  88. next unless attachment.blank? || !attachment.variant?(file_name)
  89. begin
  90. object.delete unless options[:dry_run]
  91. reclaimed_bytes += object.size
  92. removed += 1
  93. progress.log("Found and removed orphan: #{object.key}")
  94. rescue => e
  95. progress.log(pastel.red("Error processing #{object.key}: #{e}"))
  96. end
  97. end
  98. end
  99. when :fog
  100. say('The fog storage driver is not supported for this operation at this time', :red)
  101. exit(1)
  102. when :filesystem
  103. require 'find'
  104. root_path = ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
  105. Find.find(File.join(*[root_path, prefix].compact)) do |path|
  106. next if File.directory?(path)
  107. key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
  108. path_segments = key.split(File::SEPARATOR)
  109. path_segments.delete('cache')
  110. unless [7, 10].include?(path_segments.size)
  111. progress.log(pastel.yellow("Unrecognized file found: #{key}"))
  112. next
  113. end
  114. model_name = path_segments.first.classify
  115. record_id = path_segments[2..-2].join.to_i
  116. attachment_name = path_segments[1].singularize
  117. file_name = path_segments.last
  118. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  119. record = model_name.constantize.find_by(id: record_id)
  120. attachment = record&.public_send(attachment_name)
  121. progress.increment
  122. next unless attachment.blank? || !attachment.variant?(file_name)
  123. begin
  124. size = File.size(path)
  125. unless options[:dry_run]
  126. File.delete(path)
  127. begin
  128. FileUtils.rmdir(File.dirname(path), parents: true)
  129. rescue Errno::ENOTEMPTY
  130. # OK
  131. end
  132. end
  133. reclaimed_bytes += size
  134. removed += 1
  135. progress.log("Found and removed orphan: #{key}")
  136. rescue => e
  137. progress.log(pastel.red("Error processing #{key}: #{e}"))
  138. end
  139. end
  140. end
  141. progress.total = progress.progress
  142. progress.finish
  143. say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
  144. end
  145. option :account, type: :string
  146. option :domain, type: :string
  147. option :status, type: :numeric
  148. option :concurrency, type: :numeric, default: 5, aliases: [:c]
  149. option :verbose, type: :boolean, default: false, aliases: [:v]
  150. option :dry_run, type: :boolean, default: false
  151. option :force, type: :boolean, default: false
  152. desc 'refresh', 'Fetch remote media files'
  153. long_desc <<-DESC
  154. Re-downloads media attachments from other servers. You must specify the
  155. source of media attachments with one of the following options:
  156. Use the --status option to download attachments from a specific status,
  157. using the status local numeric ID.
  158. Use the --account option to download attachments from a specific account,
  159. using username@domain handle of the account.
  160. Use the --domain option to download attachments from a specific domain.
  161. By default, attachments that are believed to be already downloaded will
  162. not be re-downloaded. To force re-download of every URL, use --force.
  163. DESC
  164. def refresh
  165. dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
  166. if options[:status]
  167. scope = MediaAttachment.where(status_id: options[:status])
  168. elsif options[:account]
  169. username, domain = options[:account].split('@')
  170. account = Account.find_remote(username, domain)
  171. if account.nil?
  172. say('No such account', :red)
  173. exit(1)
  174. end
  175. scope = MediaAttachment.where(account_id: account.id)
  176. elsif options[:domain]
  177. scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
  178. else
  179. exit(1)
  180. end
  181. processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
  182. next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
  183. next if DomainBlock.reject_media?(media_attachment.account.domain)
  184. unless options[:dry_run]
  185. media_attachment.reset_file!
  186. media_attachment.reset_thumbnail!
  187. media_attachment.save
  188. end
  189. media_attachment.file_file_size + (media_attachment.thumbnail_file_size || 0)
  190. end
  191. say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
  192. end
  193. desc 'usage', 'Calculate disk space consumed by Mastodon'
  194. def usage
  195. say("Attachments:\t#{number_to_human_size(MediaAttachment.sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} (#{number_to_human_size(MediaAttachment.where(account: Account.local).sum(Arel.sql('COALESCE(file_file_size, 0) + COALESCE(thumbnail_file_size, 0)')))} local)")
  196. say("Custom emoji:\t#{number_to_human_size(CustomEmoji.sum(:image_file_size))} (#{number_to_human_size(CustomEmoji.local.sum(:image_file_size))} local)")
  197. say("Preview cards:\t#{number_to_human_size(PreviewCard.sum(:image_file_size))}")
  198. say("Avatars:\t#{number_to_human_size(Account.sum(:avatar_file_size))} (#{number_to_human_size(Account.local.sum(:avatar_file_size))} local)")
  199. say("Headers:\t#{number_to_human_size(Account.sum(:header_file_size))} (#{number_to_human_size(Account.local.sum(:header_file_size))} local)")
  200. say("Backups:\t#{number_to_human_size(Backup.sum(:dump_file_size))}")
  201. say("Imports:\t#{number_to_human_size(Import.sum(:data_file_size))}")
  202. say("Settings:\t#{number_to_human_size(SiteUpload.sum(:file_file_size))}")
  203. end
  204. desc 'lookup URL', 'Lookup where media is displayed by passing a media URL'
  205. def lookup(url)
  206. path = Addressable::URI.parse(url).path
  207. path_segments = path.split('/')[2..-1]
  208. path_segments.delete('cache')
  209. unless [7, 10].include?(path_segments.size)
  210. say('Not a media URL', :red)
  211. exit(1)
  212. end
  213. model_name = path_segments.first.classify
  214. record_id = path_segments[2..-2].join.to_i
  215. unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  216. say("Cannot find corresponding model: #{model_name}", :red)
  217. exit(1)
  218. end
  219. record = model_name.constantize.find_by(id: record_id)
  220. record = record.status if record.respond_to?(:status)
  221. unless record
  222. say('Cannot find corresponding record', :red)
  223. exit(1)
  224. end
  225. display_url = ActivityPub::TagManager.instance.url_for(record)
  226. if display_url.blank?
  227. say('No public URL for this type of record', :red)
  228. exit(1)
  229. end
  230. say(display_url, :blue)
  231. rescue Addressable::URI::InvalidURIError
  232. say('Invalid URL', :red)
  233. exit(1)
  234. end
  235. private
  236. PRELOAD_MODEL_WHITELIST = %w(
  237. Account
  238. Backup
  239. CustomEmoji
  240. Import
  241. MediaAttachment
  242. PreviewCard
  243. SiteUpload
  244. ).freeze
  245. def preload_records_from_mixed_objects(objects)
  246. preload_map = Hash.new { |hash, key| hash[key] = [] }
  247. objects.map do |object|
  248. segments = object.key.split('/')
  249. segments.delete('cache')
  250. next unless [7, 10].include?(segments.size)
  251. model_name = segments.first.classify
  252. record_id = segments[2..-2].join.to_i
  253. next unless PRELOAD_MODEL_WHITELIST.include?(model_name)
  254. preload_map[model_name] << record_id
  255. end
  256. preload_map.each_with_object({}) do |(model_name, record_ids), model_map|
  257. model_map[model_name] = model_name.constantize.where(id: record_ids).index_by(&:id)
  258. end
  259. end
  260. end
  261. end