Merge remote-tracking branch 'upstream/main'

2026-02-24 03:02:42 +00:00 · 2022-01-20 23:53:03 +08:00 · 2022-01-20 23:53:03 +08:00 · cb13b04b53
commit cb13b04b53
parent 2bd2b5b391 1e8c885e5a
704 changed files with 23752 additions and 11017 deletions
--- a/lib/cli.rb
+++ b/lib/cli.rb
@ -13,6 +13,7 @@ require_relative 'mastodon/preview_cards_cli'
 require_relative 'mastodon/cache_cli'
 require_relative 'mastodon/upgrade_cli'
 require_relative 'mastodon/email_domain_blocks_cli'
+require_relative 'mastodon/canonical_email_blocks_cli'
 require_relative 'mastodon/ip_blocks_cli'
 require_relative 'mastodon/maintenance_cli'
 require_relative 'mastodon/version'
@ -62,6 +63,9 @@ module Mastodon
    desc 'ip_blocks SUBCOMMAND ...ARGS', 'Manage IP blocks'
    subcommand 'ip_blocks', Mastodon::IpBlocksCLI

+    desc 'canonical_email_blocks SUBCOMMAND ...ARGS', 'Manage canonical e-mail blocks'
+    subcommand 'canonical_email_blocks', Mastodon::CanonicalEmailBlocksCLI
+
    desc 'maintenance SUBCOMMAND ...ARGS', 'Various maintenance utilities'
    subcommand 'maintenance', Mastodon::MaintenanceCLI

@ -94,17 +98,22 @@ module Mastodon

      exit(1) unless prompt.ask('Type in the domain of the server to confirm:', required: true) == Rails.configuration.x.local_domain

-      prompt.warn('This operation WILL NOT be reversible. It can also take a long time.')
-      prompt.warn('While the data won\'t be erased locally, the server will be in a BROKEN STATE afterwards.')
-      prompt.warn('A running Sidekiq process is required. Do not shut it down until queues clear.')
+      unless options[:dry_run]
+        prompt.warn('This operation WILL NOT be reversible. It can also take a long time.')
+        prompt.warn('While the data won\'t be erased locally, the server will be in a BROKEN STATE afterwards.')
+        prompt.warn('A running Sidekiq process is required. Do not shut it down until queues clear.')

-      exit(1) if prompt.no?('Are you sure you want to proceed?')
+        exit(1) if prompt.no?('Are you sure you want to proceed?')
+      end

      inboxes   = Account.inboxes
      processed = 0
      dry_run   = options[:dry_run] ? ' (DRY RUN)' : ''

+      Setting.registrations_mode = 'none' unless options[:dry_run]
+
      if inboxes.empty?
+        Account.local.without_suspended.in_batches.update_all(suspended_at: Time.now.utc, suspension_origin: :local) unless options[:dry_run]
        prompt.ok('It seems like your server has not federated with anything')
        prompt.ok('You can shut it down and delete it any time')
        return
@ -112,9 +121,7 @@ module Mastodon

      prompt.warn('Do NOT interrupt this process...')

-      Setting.registrations_mode = 'none'
-
-      Account.local.without_suspended.find_each do |account|
+      delete_account = ->(account) do
        payload = ActiveModelSerializers::SerializableResource.new(
          account,
          serializer: ActivityPub::DeleteActorSerializer,
@ -128,12 +135,15 @@ module Mastodon
            [json, account.id, inbox_url]
          end

-          account.suspend!
+          account.suspend!(block_email: false)
        end

        processed += 1
      end

+      Account.local.without_suspended.find_each { |account| delete_account.call(account) }
+      Account.local.suspended.joins(:deletion_request).find_each { |account| delete_account.call(account) }
+
      prompt.ok("Queued #{inboxes.size * processed} items into Sidekiq for #{processed} accounts#{dry_run}")
      prompt.ok('Wait until Sidekiq processes all items, then you can shut everything down and delete the data')
    rescue TTY::Reader::InputInterrupt
--- a/lib/mastodon/accounts_cli.rb
+++ b/lib/mastodon/accounts_cli.rb
@ -287,7 +287,7 @@ module Mastodon

    option :concurrency, type: :numeric, default: 5, aliases: [:c]
    option :dry_run, type: :boolean
-    desc 'cull', 'Remove remote accounts that no longer exist'
+    desc 'cull [DOMAIN...]', 'Remove remote accounts that no longer exist'
    long_desc <<-LONG_DESC
      Query every single remote account in the database to determine
      if it still exists on the origin server, and if it doesn't,
@ -296,19 +296,22 @@ module Mastodon
      Accounts that have had confirmed activity within the last week
      are excluded from the checks.
    LONG_DESC
-    def cull
+    def cull(*domains)
      skip_threshold = 7.days.ago
      dry_run        = options[:dry_run] ? ' (DRY RUN)' : ''
      skip_domains   = Concurrent::Set.new

-      processed, culled = parallelize_with_progress(Account.remote.where(protocol: :activitypub).partitioned) do |account|
+      query = Account.remote.where(protocol: :activitypub)
+      query = query.where(domain: domains) unless domains.empty?
+
+      processed, culled = parallelize_with_progress(query.partitioned) do |account|
        next if account.updated_at >= skip_threshold || (account.last_webfingered_at.present? && account.last_webfingered_at >= skip_threshold) || skip_domains.include?(account.domain)

        code = 0

        begin
          code = Request.new(:head, account.uri).perform(&:code)
-        rescue HTTP::ConnectionError
+        rescue HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError
          skip_domains << account.domain
        end

--- a/lib/mastodon/canonical_email_blocks_cli.rb
+++ b/lib/mastodon/canonical_email_blocks_cli.rb
@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+require 'concurrent'
+require_relative '../../config/boot'
+require_relative '../../config/environment'
+require_relative 'cli_helper'
+
+module Mastodon
+  class CanonicalEmailBlocksCLI < Thor
+    include CLIHelper
+
+    def self.exit_on_failure?
+      true
+    end
+
+    desc 'find EMAIL', 'Find a given e-mail address in the canonical e-mail blocks'
+    long_desc <<-LONG_DESC
+      When suspending a local user, a hash of a "canonical" version of their e-mail
+      address is stored to prevent them from signing up again.
+
+      This command can be used to find whether a known email address is blocked,
+      and if so, which account it was attached to.
+    LONG_DESC
+    def find(email)
+      accts = CanonicalEmailBlock.find_blocks(email).map(&:reference_account).map(&:acct).to_a
+      if accts.empty?
+        say("#{email} is not blocked", :yellow)
+      else
+        accts.each do |acct|
+          say(acct, :white)
+        end
+      end
+    end
+
+    desc 'remove EMAIL', 'Remove a canonical e-mail block'
+    long_desc <<-LONG_DESC
+      When suspending a local user, a hash of a "canonical" version of their e-mail
+      address is stored to prevent them from signing up again.
+
+      This command allows removing a canonical email block.
+    LONG_DESC
+    def remove(email)
+      blocks = CanonicalEmailBlock.find_blocks(email)
+      if blocks.empty?
+        say("#{email} is not blocked", :yellow)
+      else
+        blocks.destroy_all
+        say("Removed canonical email block for #{email}", :green)
+      end
+    end
+
+    private
+
+    def color(processed, failed)
+      if !processed.zero? && failed.zero?
+        :green
+      elsif failed.zero?
+        :yellow
+      else
+        :red
+      end
+    end
+  end
+end
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@ -230,6 +230,7 @@ module Mastodon

      processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
        next if media_attachment.remote_url.blank? || (!options[:force] && media_attachment.file_file_name.present?)
+        next if DomainBlock.reject_media?(media_attachment.account.domain)

        unless options[:dry_run]
          media_attachment.reset_file!
--- a/lib/mastodon/search_cli.rb
+++ b/lib/mastodon/search_cli.rb
@ -17,10 +17,11 @@ module Mastodon
    ].freeze

    option :concurrency, type: :numeric, default: 2, aliases: [:c], desc: 'Workload will be split between this number of threads'
+    option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
    option :only, type: :array, enum: %w(accounts tags statuses), desc: 'Only process these indices'
-    desc 'deploy', 'Create or upgrade ElasticSearch indices and populate them'
+    desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'
    long_desc <<~LONG_DESC
-      If ElasticSearch is empty, this command will create the necessary indices
+      If Elasticsearch is empty, this command will create the necessary indices
      and then import data from the database into those indices.

      This command will also upgrade indices if the underlying schema has been
@ -35,6 +36,11 @@ module Mastodon
        exit(1)
      end

+      if options[:batch_size] < 1
+        say('Cannot run with this batch_size setting, must be at least 1', :red)
+        exit(1)
+      end
+
      indices = begin
        if options[:only]
          options[:only].map { |str| "#{str.camelize}Index".constantize }
@ -64,11 +70,7 @@ module Mastodon
      progress.title = 'Estimating workload '

      # Estimate the amount of data that has to be imported first
-      indices.each do |index|
-        index.types.each do |type|
-          progress.total = (progress.total || 0) + type.adapter.default_scope.count
-        end
-      end
+      progress.total = indices.sum { |index| index.adapter.default_scope.count }

      # Now import all the actual data. Mind that unlike chewy:sync, we don't
      # fetch and compare all record IDs from the database and the index to
@ -77,70 +79,71 @@ module Mastodon
      # is uneconomical. So we only ever add.
      indices.each do |index|
        progress.title = "Importing #{index} "
-        batch_size     = 1_000
+        batch_size     = options[:batch_size]
        slice_size     = (batch_size / options[:concurrency]).ceil

-        index.types.each do |type|
-          type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
-            futures = []
+        index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
+          futures = []

-            batch.each_slice(slice_size) do |records|
-              futures << Concurrent::Future.execute(executor: pool) do
-                begin
-                  if !progress.total.nil? && progress.progress + records.size > progress.total
-                    # The number of items has changed between start and now,
-                    # since there is no good way to predict the final count from
-                    # here, just change the progress bar to an indeterminate one
+          batch.each_slice(slice_size) do |records|
+            futures << Concurrent::Future.execute(executor: pool) do
+              begin
+                if !progress.total.nil? && progress.progress + records.size > progress.total
+                  # The number of items has changed between start and now,
+                  # since there is no good way to predict the final count from
+                  # here, just change the progress bar to an indeterminate one

-                    progress.total = nil
+                  progress.total = nil
+                end
+
+                grouped_records = nil
+                bulk_body       = nil
+                index_count     = 0
+                delete_count    = 0
+
+                ActiveRecord::Base.connection_pool.with_connection do
+                  grouped_records = records.to_a.group_by do |record|
+                    index.adapter.send(:delete_from_index?, record) ? :delete : :to_index
                  end

-                  grouped_records = nil
-                  bulk_body       = nil
-                  index_count     = 0
-                  delete_count    = 0
+                  bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
+                end

-                  ActiveRecord::Base.connection_pool.with_connection do
-                    grouped_records = type.adapter.send(:grouped_objects, records)
-                    bulk_body       = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body
-                  end
+                index_count  = grouped_records[:to_index].size  if grouped_records.key?(:to_index)
+                delete_count = grouped_records[:delete].size    if grouped_records.key?(:delete)

-                  index_count  = grouped_records[:index].size  if grouped_records.key?(:index)
-                  delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
+                # The following is an optimization for statuses specifically, since
+                # we want to de-index statuses that cannot be searched by anybody,
+                # but can't use Chewy's delete_if logic because it doesn't use
+                # crutches and our searchable_by logic depends on them
+                if index == StatusesIndex
+                  bulk_body.map! do |entry|
+                    if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
+                      index_count  -= 1
+                      delete_count += 1

-                  # The following is an optimization for statuses specifically, since
-                  # we want to de-index statuses that cannot be searched by anybody,
-                  # but can't use Chewy's delete_if logic because it doesn't use
-                  # crutches and our searchable_by logic depends on them
-                  if type == StatusesIndex::Status
-                    bulk_body.map! do |entry|
-                      if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
-                        index_count  -= 1
-                        delete_count += 1
-
-                        { delete: entry[:index].except(:data) }
-                      else
-                        entry
-                      end
+                      { delete: entry[:to_index].except(:data) }
+                    else
+                      entry
                    end
                  end
-
-                  Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body)
-
-                  progress.progress += records.size
-
-                  added.increment(index_count)
-                  removed.increment(delete_count)
-
-                  sleep 1
-                rescue => e
-                  progress.log pastel.red("Error importing #{index}: #{e}")
                end
+
+                Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
+
+                progress.progress += records.size
+
+                added.increment(index_count)
+                removed.increment(delete_count)
+
+                sleep 1
+              rescue => e
+                progress.log pastel.red("Error importing #{index}: #{e}")
              end
            end
-
-            futures.map(&:value)
          end
+
+          futures.map(&:value)
        end
      end

--- a/lib/mastodon/snowflake.rb
+++ b/lib/mastodon/snowflake.rb
@ -84,10 +84,7 @@ module Mastodon::Snowflake
              -- Take the first two bytes (four hex characters)
              substr(
                -- Of the MD5 hash of the data we documented
-                md5(table_name ||
-                  '#{SecureRandom.hex(16)}' ||
-                  time_part::text
-                ),
+                md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),
                1, 4
              )
            -- And turn it into a bigint
--- a/lib/mastodon/statuses_cli.rb
+++ b/lib/mastodon/statuses_cli.rb
@ -6,6 +6,7 @@ require_relative 'cli_helper'

 module Mastodon
  class StatusesCLI < Thor
+    include CLIHelper
    include ActionView::Helpers::NumberHelper

    def self.exit_on_failure?
@ -13,64 +14,213 @@ module Mastodon
    end

    option :days, type: :numeric, default: 90
-    option :clean_followed, type: :boolean
-    option :skip_media_remove, type: :boolean
+    option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
+    option :continue, type: :boolean, default: false, desc: 'If remove is not completed, execute from the previous continuation'
+    option :clean_followed, type: :boolean, default: false, desc: 'Include the status of remote accounts that are followed by local accounts as candidates for remove'
+    option :skip_status_remove, type: :boolean, default: false, desc: 'Skip status remove (run only cleanup tasks)'
+    option :skip_media_remove, type: :boolean, default: false, desc: 'Skip remove orphaned media attachments'
+    option :compress_database, type: :boolean, default: false, desc: 'Compress database and update the statistics. This option locks the table for a long time, so run it offline'
    desc 'remove', 'Remove unreferenced statuses'
    long_desc <<~LONG_DESC
      Remove statuses that are not referenced by local user activity, such as
      ones that came from relays, or belonging to users that were once followed
      by someone locally but no longer are.

+      It also removes orphaned records and performs additional cleanup tasks
+      such as updating statistics and recovering disk space.
+
      This is a computationally heavy procedure that creates extra database
      indices before commencing, and removes them afterward.
    LONG_DESC
    def remove
+      if options[:batch_size] < 1
+        say('Cannot run with this batch_size setting, must be at least 1', :red)
+        exit(1)
+      end
+
+      remove_statuses
+      vacuum_and_analyze_statuses
+      remove_orphans_media_attachments
+      remove_orphans_conversations
+      vacuum_and_analyze_conversations
+    end
+
+    private
+
+    def remove_statuses
+      return if options[:skip_status_remove]
+
      say('Creating temporary database indices...')

-      ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local)
-      ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id)
-      ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url)
+      ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently, if_not_exists: true)

      max_id   = Mastodon::Snowflake.id_at(options[:days].days.ago)
      start_at = Time.now.to_f

-      say('Beginning removal... This might take a while...')
+      unless options[:continue] && ActiveRecord::Base.connection.table_exists?('statuses_to_be_deleted')
+        ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently, if_not_exists: true)
+        ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently, if_not_exists: true)

-      scope = Status.remote.where('id < ?', max_id)
-      # Skip reblogs of local statuses
-      scope = scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))')
-      # Skip statuses that are pinned on profiles
-      scope = scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)')
-      # Skip statuses that mention local accounts
-      scope = scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
-      # Skip statuses which have replies
-      scope = scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)')
-      # Skip statuses reblogged by local accounts or with recent boosts
-      scope = scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))', max_id)
-      # Skip statuses favourited by local users
-      scope = scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
-      # Skip statuses bookmarked by local users
-      scope = scope.where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
+        say('Extract the deletion target from statuses... This might take a while...')
+
+        ActiveRecord::Base.connection.create_table('statuses_to_be_deleted', force: true)

-      unless options[:clean_followed]
        # Skip accounts followed by local accounts
-        scope = scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)')
+        clean_followed_sql = 'AND NOT EXISTS (SELECT 1 FROM follows WHERE statuses.account_id = follows.target_account_id)' unless options[:clean_followed]
+
+        ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL', [[nil, max_id]])
+          INSERT INTO statuses_to_be_deleted (id)
+          SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1)
+          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)
+          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))
+          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1))
+          AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id)
+          AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
+          AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
+          AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
+          #{clean_followed_sql}
+        SQL
+
+        say('Removing temporary database indices to restore write performance...')
+
+        ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
+        ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
      end

-      scope.in_batches.delete_all
+      say('Beginning statuses removal... This might take a while...')

-      unless options[:skip_media_remove]
-        say('Beginning removal of now-orphaned media attachments to free up disk space...')
-        Scheduler::MediaCleanupScheduler.new.perform
+      klass = Class.new(ApplicationRecord) do |c|
+        c.table_name = 'statuses_to_be_deleted'
      end

-      say("Done after #{Time.now.to_f - start_at}s", :green)
+      Object.const_set('StatusToBeDeleted', klass)
+
+      scope     = StatusToBeDeleted
+      processed = 0
+      removed   = 0
+      progress  = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)
+
+      scope.reorder(nil).in_batches(of: options[:batch_size]) do |relation|
+        ids        = relation.pluck(:id)
+        processed += ids.count
+        removed   += Status.unscoped.where(id: ids).delete_all
+        progress.increment
+      end
+
+      progress.stop
+
+      ActiveRecord::Base.connection.drop_table('statuses_to_be_deleted')
+
+      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} statuses.", :green)
    ensure
      say('Removing temporary database indices to restore write performance...')

-      ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local) if ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local)
-      ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id) if ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id)
-      ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url) if ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url)
+      ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
+      ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
+      ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url, if_exists: true)
+    end
+
+    def remove_orphans_media_attachments
+      return if options[:skip_media_remove]
+
+      start_at = Time.now.to_f
+
+      say('Beginning removal of now-orphaned media attachments to free up disk space...')
+
+      scope     = MediaAttachment.reorder(nil).unattached.where('created_at < ?', options[:days].pred.days.ago)
+      processed = 0
+      removed   = 0
+      progress  = create_progress_bar(scope.count)
+
+      scope.find_each do |media_attachment|
+        media_attachment.destroy!
+
+        removed += 1
+      rescue => e
+        progress.log pastel.red("Error processing #{media_attachment.id}: #{e}")
+      ensure
+        progress.increment
+        processed += 1
+      end
+
+      progress.stop
+
+      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} media_attachments.", :green)
+    end
+
+    def remove_orphans_conversations
+      start_at = Time.now.to_f
+
+      unless options[:continue] && ActiveRecord::Base.connection.table_exists?('conversations_to_be_deleted')
+        say('Creating temporary database indices...')
+
+        ActiveRecord::Base.connection.add_index(:statuses, :conversation_id, name: :index_statuses_conversation_id, algorithm: :concurrently, if_not_exists: true)
+
+        say('Extract the deletion target from coversations... This might take a while...')
+
+        ActiveRecord::Base.connection.create_table('conversations_to_be_deleted', force: true)
+
+        ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL')
+          INSERT INTO conversations_to_be_deleted (id)
+          SELECT id FROM conversations WHERE NOT EXISTS (SELECT 1 FROM statuses WHERE statuses.conversation_id = conversations.id)
+        SQL
+
+        say('Removing temporary database indices to restore write performance...')
+        ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
+      end
+
+      say('Beginning orphans removal... This might take a while...')
+
+      klass = Class.new(ApplicationRecord) do |c|
+        c.table_name = 'conversations_to_be_deleted'
+      end
+
+      Object.const_set('ConversationsToBeDeleted', klass)
+
+      scope     = ConversationsToBeDeleted
+      processed = 0
+      removed   = 0
+      progress  = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)
+
+      scope.in_batches(of: options[:batch_size]) do |relation|
+        ids        = relation.pluck(:id)
+        processed += ids.count
+        removed   += Conversation.unscoped.where(id: ids).delete_all
+        progress.increment
+      end
+
+      progress.stop
+
+      ActiveRecord::Base.connection.drop_table('conversations_to_be_deleted')
+
+      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} conversations.", :green)
+    ensure
+      say('Removing temporary database indices to restore write performance...')
+      ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
+    end
+
+    def vacuum_and_analyze_statuses
+      if options[:compress_database]
+        say('Run VACUUM FULL ANALYZE to statuses...')
+        ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE statuses')
+        say('Run REINDEX to statuses...')
+        ActiveRecord::Base.connection.execute('REINDEX TABLE statuses')
+      else
+        say('Run ANALYZE to statuses...')
+        ActiveRecord::Base.connection.execute('ANALYZE statuses')
+      end
+    end
+
+    def vacuum_and_analyze_conversations
+      if options[:compress_database]
+        say('Run VACUUM FULL ANALYZE to conversations...')
+        ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE conversations')
+        say('Run REINDEX to conversations...')
+        ActiveRecord::Base.connection.execute('REINDEX TABLE conversations')
+      else
+        say('Run ANALYZE to conversations...')
+        ActiveRecord::Base.connection.execute('ANALYZE conversations')
+      end
    end
  end
 end
--- a/lib/mastodon/version.rb
+++ b/lib/mastodon/version.rb
@ -13,7 +13,7 @@ module Mastodon
    end

    def patch
-      1
+      3
    end

    def flags
--- a/lib/paperclip/attachment_extensions.rb
+++ b/lib/paperclip/attachment_extensions.rb
@ -6,6 +6,35 @@ module Paperclip
      instance_read(:meta)
    end

+    # monkey-patch to avoid unlinking too avoid unlinking source file too early
+    # see https://github.com/kreeti/kt-paperclip/issues/64
+    def post_process_style(name, style) #:nodoc:
+      raise "Style #{name} has no processors defined." if style.processors.blank?
+
+      intermediate_files = []
+      original = @queued_for_write[:original]
+      # if we're processing the original, close + unlink the source tempfile
+      intermediate_files << original if name == :original
+
+      @queued_for_write[name] = style.processors.
+                                inject(original) do |file, processor|
+        file = Paperclip.processor(processor).make(file, style.processor_options, self)
+        intermediate_files << file unless file == original
+        file
+      end
+
+      unadapted_file = @queued_for_write[name]
+      @queued_for_write[name] = Paperclip.io_adapters.
+                                for(@queued_for_write[name], @options[:adapter_options])
+      unadapted_file.close if unadapted_file.respond_to?(:close)
+      @queued_for_write[name]
+    rescue Paperclip::Errors::NotIdentifiedByImageMagickError => e
+      log("An error was received while processing: #{e.inspect}")
+      (@errors[:processing] ||= []) << e.message if @options[:whiny]
+    ensure
+      unlink_files(intermediate_files)
+    end
+
    # We overwrite this method to support delayed processing in
    # Sidekiq. Since we process the original file to reduce disk
    # usage, and we still want to generate thumbnails straight
--- a/lib/paperclip/media_type_spoof_detector_extensions.rb
+++ b/lib/paperclip/media_type_spoof_detector_extensions.rb
@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-module Paperclip
-  module MediaTypeSpoofDetectorExtensions
-    def mapping_override_mismatch?
-      !Array(mapped_content_type).include?(calculated_content_type) && !Array(mapped_content_type).include?(type_from_mime_magic)
-    end
-
-    def calculated_media_type_from_mime_magic
-      @calculated_media_type_from_mime_magic ||= type_from_mime_magic.split('/').first
-    end
-
-    def calculated_type_mismatch?
-      !media_types_from_name.include?(calculated_media_type) && !media_types_from_name.include?(calculated_media_type_from_mime_magic)
-    end
-
-    def type_from_mime_magic
-      @type_from_mime_magic ||= begin
-        begin
-          File.open(@file.path) do |file|
-            MimeMagic.by_magic(file)&.type || ''
-          end
-        rescue Errno::ENOENT
-          ''
-        end
-      end
-    end
-
-    def type_from_file_command
-      @type_from_file_command ||= FileCommandContentTypeDetector.new(@file.path).detect
-    end
-  end
-end
-
-Paperclip::MediaTypeSpoofDetector.prepend(Paperclip::MediaTypeSpoofDetectorExtensions)
--- a/lib/paperclip/response_with_limit_adapter.rb
+++ b/lib/paperclip/response_with_limit_adapter.rb
@ -17,9 +17,9 @@ module Paperclip

    def cache_current_values
      @original_filename = filename_from_content_disposition.presence || filename_from_path.presence || 'data'
-      @size = @target.response.content_length
      @tempfile = copy_to_tempfile(@target)
      @content_type = ContentTypeDetector.new(@tempfile.path).detect
+      @size = File.size(@tempfile)
    end

    def copy_to_tempfile(source)
--- a/lib/paperclip/schema_extensions.rb
+++ b/lib/paperclip/schema_extensions.rb
@ -1,37 +0,0 @@
-# frozen_string_literal: true
-
-# Monkey-patch various Paperclip methods for Ruby 3.0 compatibility
-
-module Paperclip
-  module Schema
-    module StatementsExtensions
-      def add_attachment(table_name, *attachment_names)
-        raise ArgumentError, 'Please specify attachment name in your add_attachment call in your migration.' if attachment_names.empty?
-
-        options = attachment_names.extract_options!
-
-        attachment_names.each do |attachment_name|
-          COLUMNS.each_pair do |column_name, column_type|
-            column_options = options.merge(options[column_name.to_sym] || {})
-            add_column(table_name, "#{attachment_name}_#{column_name}", column_type, **column_options)
-          end
-        end
-      end
-    end
-
-    module TableDefinitionExtensions
-      def attachment(*attachment_names)
-        options = attachment_names.extract_options!
-        attachment_names.each do |attachment_name|
-          COLUMNS.each_pair do |column_name, column_type|
-            column_options = options.merge(options[column_name.to_sym] || {})
-            column("#{attachment_name}_#{column_name}", column_type, **column_options)
-          end
-        end
-      end
-    end
-  end
-end
-
-Paperclip::Schema::Statements.prepend(Paperclip::Schema::StatementsExtensions)
-Paperclip::Schema::TableDefinition.prepend(Paperclip::Schema::TableDefinitionExtensions)
--- a/lib/paperclip/storage_extensions.rb
+++ b/lib/paperclip/storage_extensions.rb
@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+# Some S3-compatible providers might not actually be compatible with some APIs
+# used by kt-paperclip, see https://github.com/mastodon/mastodon/issues/16822
+if ENV['S3_ENABLED'] == 'true' && ENV['S3_FORCE_SINGLE_REQUEST'] == 'true'
+  module Paperclip
+    module Storage
+      module S3Extensions
+        def copy_to_local_file(style, local_dest_path)
+          log("copying #{path(style)} to local file #{local_dest_path}")
+          s3_object(style).download_file(local_dest_path, { mode: 'single_request' })
+        rescue Aws::Errors::ServiceError => e
+          warn("#{e} - cannot copy #{path(style)} to local file #{local_dest_path}")
+          false
+        end
+      end
+    end
+  end
+
+  Paperclip::Storage::S3.prepend(Paperclip::Storage::S3Extensions)
+end
--- a/lib/paperclip/url_generator_extensions.rb
+++ b/lib/paperclip/url_generator_extensions.rb
@ -2,16 +2,6 @@

 module Paperclip
  module UrlGeneratorExtensions
-    # Monkey-patch Paperclip to use Addressable::URI's normalization instead
-    # of the long-deprecated URI.esacpe
-    def escape_url(url)
-      if url.respond_to?(:escape)
-        url.escape
-      else
-        Addressable::URI.parse(url).normalize.to_str.gsub(escape_regex) { |m| "%#{m.ord.to_s(16).upcase}" }
-      end
-    end
-
    def for_as_default(style_name)
      attachment_options[:interpolator].interpolate(default_url, @attachment, style_name)
    end
--- a/lib/paperclip/validation_extensions.rb
+++ b/lib/paperclip/validation_extensions.rb
@ -1,58 +0,0 @@
-# frozen_string_literal: true
-
-# Monkey-patch various Paperclip validators for Ruby 3.0 compatibility
-
-module Paperclip
-  module Validators
-    module AttachmentSizeValidatorExtensions
-      def validate_each(record, attr_name, _value)
-        base_attr_name = attr_name
-        attr_name = "#{attr_name}_file_size".to_sym
-        value = record.send(:read_attribute_for_validation, attr_name)
-
-        if value.present?
-          options.slice(*Paperclip::Validators::AttachmentSizeValidator::AVAILABLE_CHECKS).each do |option, option_value|
-            option_value = option_value.call(record) if option_value.is_a?(Proc)
-            option_value = extract_option_value(option, option_value)
-
-            next if value.send(Paperclip::Validators::AttachmentSizeValidator::CHECKS[option], option_value)
-
-            error_message_key = options[:in] ? :in_between : option
-            [attr_name, base_attr_name].each do |error_attr_name|
-              record.errors.add(error_attr_name, error_message_key, **filtered_options(value).merge(
-                min: min_value_in_human_size(record),
-                max: max_value_in_human_size(record),
-                count: human_size(option_value)
-              ))
-            end
-          end
-        end
-      end
-    end
-
-    module AttachmentContentTypeValidatorExtensions
-      def mark_invalid(record, attribute, types)
-        record.errors.add attribute, :invalid, **options.merge({ types: types.join(', ') })
-      end
-    end
-
-    module AttachmentPresenceValidatorExtensions
-      def validate_each(record, attribute, _value)
-        if record.send("#{attribute}_file_name").blank?
-          record.errors.add(attribute, :blank, **options)
-        end
-      end
-    end
-
-    module AttachmentFileNameValidatorExtensions
-      def mark_invalid(record, attribute, patterns)
-        record.errors.add attribute, :invalid, options.merge({ names: patterns.join(', ') })
-      end
-    end
-  end
-end
-
-Paperclip::Validators::AttachmentSizeValidator.prepend(Paperclip::Validators::AttachmentSizeValidatorExtensions)
-Paperclip::Validators::AttachmentContentTypeValidator.prepend(Paperclip::Validators::AttachmentContentTypeValidatorExtensions)
-Paperclip::Validators::AttachmentPresenceValidator.prepend(Paperclip::Validators::AttachmentPresenceValidatorExtensions)
-Paperclip::Validators::AttachmentFileNameValidator.prepend(Paperclip::Validators::AttachmentFileNameValidatorExtensions)
--- a/lib/sidekiq_error_handler.rb
+++ b/lib/sidekiq_error_handler.rb
@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+class SidekiqErrorHandler
+  BACKTRACE_LIMIT = 3
+
+  def call(*)
+    yield
+  rescue Mastodon::HostValidationError
+    # Do not retry
+  rescue => e
+    limit_backtrace_and_raise(e)
+  ensure
+    socket = Thread.current[:statsd_socket]
+    socket&.close
+    Thread.current[:statsd_socket] = nil
+  end
+
+  private
+
+  def limit_backtrace_and_raise(exception)
+    exception.set_backtrace(exception.backtrace.first(BACKTRACE_LIMIT))
+    raise exception
+  end
+end
--- a/lib/tasks/mastodon.rake
+++ b/lib/tasks/mastodon.rake
@ -333,8 +333,12 @@ namespace :mastodon do
      prompt.say 'This configuration will be written to .env.production'

      if prompt.yes?('Save configuration?')
+        incompatible_syntax = false
+
        env_contents = env.each_pair.map do |key, value|
          if value.is_a?(String) && value =~ /[\s\#\\"]/
+            incompatible_syntax = true
+
            if value =~ /[']/
              value = value.to_s.gsub(/[\\"\$]/) { |x| "\\#{x}" }
              "#{key}=\"#{value}\""
@ -346,12 +350,19 @@ namespace :mastodon do
          end
        end.join("\n")

-        File.write(Rails.root.join('.env.production'), "# Generated with mastodon:setup on #{Time.now.utc}\n\n" + env_contents + "\n")
+        generated_header = "# Generated with mastodon:setup on #{Time.now.utc}\n\n".dup
+
+        if incompatible_syntax
+          generated_header << "# Some variables in this file will be interpreted differently whether you are\n"
+          generated_header << "# using docker-compose or not.\n\n"
+        end
+
+        File.write(Rails.root.join('.env.production'), "#{generated_header}#{env_contents}\n")

        if using_docker
          prompt.ok 'Below is your configuration, save it to an .env.production file outside Docker:'
          prompt.say "\n"
-          prompt.say File.read(Rails.root.join('.env.production'))
+          prompt.say "#{generated_header}#{env.each_pair.map { |key, value| "#{key}=#{value}" }.join("\n")}"
          prompt.say "\n"
          prompt.ok 'It is also saved within this container so you can proceed with this wizard.'
        end
@ -430,7 +441,7 @@ namespace :mastodon do

  namespace :webpush do
    desc 'Generate VAPID key'
-    task generate_vapid_key: :environment do
+    task :generate_vapid_key do
      vapid_key = Webpush.generate_key
      puts "VAPID_PRIVATE_KEY=#{vapid_key.private_key}"
      puts "VAPID_PUBLIC_KEY=#{vapid_key.public_key}"
--- a/lib/tasks/repo.rake
+++ b/lib/tasks/repo.rake
@ -96,7 +96,7 @@ namespace :repo do
    end.uniq.compact

    missing_available_locales = locales_in_files - I18n.available_locales
-    missing_locale_names = I18n.available_locales.reject { |locale| SettingsHelper::HUMAN_LOCALES.key?(locale) }
+    missing_locale_names = I18n.available_locales.reject { |locale| LanguagesHelper::HUMAN_LOCALES.key?(locale) }

    critical = false