You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

173 lines
4.5 KiB

  1. # frozen_string_literal: true
  2. class SpamCheck
  3. include Redisable
  4. include ActionView::Helpers::TextHelper
  5. NILSIMSA_COMPARE_THRESHOLD = 95
  6. NILSIMSA_MIN_SIZE = 10
  7. EXPIRE_SET_AFTER = 1.week.seconds
  8. def initialize(status)
  9. @account = status.account
  10. @status = status
  11. end
  12. def skip?
  13. disabled? || already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
  14. end
  15. def spam?
  16. if insufficient_data?
  17. false
  18. elsif nilsimsa?
  19. any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
  20. else
  21. any_other_digest?('md5') { |_, other_digest| other_digest == digest }
  22. end
  23. end
  24. def flag!
  25. auto_silence_account!
  26. auto_report_status!
  27. end
  28. def remember!
  29. # The scores in sorted sets don't actually have enough bits to hold an exact
  30. # value of our snowflake IDs, so we use it only for its ordering property. To
  31. # get the correct status ID back, we have to save it in the string value
  32. redis.zadd(redis_key, @status.id, digest_with_algorithm)
  33. redis.zremrangebyrank(redis_key, '0', '-10')
  34. redis.expire(redis_key, EXPIRE_SET_AFTER)
  35. end
  36. def reset!
  37. redis.del(redis_key)
  38. end
  39. def hashable_text
  40. return @hashable_text if defined?(@hashable_text)
  41. @hashable_text = @status.text
  42. @hashable_text = remove_mentions(@hashable_text)
  43. @hashable_text = strip_tags(@hashable_text) unless @status.local?
  44. @hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
  45. @hashable_text = remove_whitespace(@hashable_text)
  46. end
  47. def insufficient_data?
  48. hashable_text.blank?
  49. end
  50. def digest
  51. @digest ||= begin
  52. if nilsimsa?
  53. Nilsimsa.new(hashable_text).hexdigest
  54. else
  55. Digest::MD5.hexdigest(hashable_text)
  56. end
  57. end
  58. end
  59. def digest_with_algorithm
  60. if nilsimsa?
  61. ['nilsimsa', digest, @status.id].join(':')
  62. else
  63. ['md5', digest, @status.id].join(':')
  64. end
  65. end
  66. private
  67. def disabled?
  68. !Setting.spam_check_enabled
  69. end
  70. def remove_mentions(text)
  71. return text.gsub(Account::MENTION_RE, '') if @status.local?
  72. Nokogiri::HTML.fragment(text).tap do |html|
  73. mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
  74. html.traverse do |element|
  75. element.unlink if element.name == 'a' && mentions.include?(element['href'])
  76. end
  77. end.to_s
  78. end
  79. def normalize_unicode(text)
  80. text.unicode_normalize(:nfkc).downcase
  81. end
  82. def remove_whitespace(text)
  83. text.gsub(/\s+/, ' ').strip
  84. end
  85. def auto_silence_account!
  86. @account.silence!
  87. end
  88. def auto_report_status!
  89. status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
  90. ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected_and_silenced'))
  91. end
  92. def already_flagged?
  93. @account.silenced?
  94. end
  95. def trusted?
  96. @account.trust_level > Account::TRUST_LEVELS[:untrusted]
  97. end
  98. def no_unsolicited_mentions?
  99. @status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
  100. end
  101. def solicited_reply?
  102. !@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
  103. end
  104. def nilsimsa_compare_value(first, second)
  105. first = [first].pack('H*')
  106. second = [second].pack('H*')
  107. bits = 0
  108. 0.upto(31) do |i|
  109. bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
  110. end
  111. 128 - bits # -128 <= Nilsimsa Compare Value <= 128
  112. end
  113. def nilsimsa?
  114. hashable_text.size > NILSIMSA_MIN_SIZE
  115. end
  116. def other_digests
  117. redis.zrange(redis_key, 0, -1)
  118. end
  119. def any_other_digest?(filter_algorithm)
  120. other_digests.any? do |record|
  121. algorithm, other_digest, status_id = record.split(':')
  122. next unless algorithm == filter_algorithm
  123. yield algorithm, other_digest, status_id
  124. end
  125. end
  126. def matching_status_ids
  127. if nilsimsa?
  128. other_digests.select { |record| record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }.map { |record| record.split(':')[2] }.compact
  129. else
  130. other_digests.select { |record| record.start_with?('md5') && record.split(':')[1] == digest }.map { |record| record.split(':')[2] }.compact
  131. end
  132. end
  133. def redis_key
  134. @redis_key ||= "spam_check:#{@account.id}"
  135. end
  136. end