You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.1 KiB

  1. # frozen_string_literal: true
  2. class LanguageDetector
  3. attr_reader :text, :account
  4. def initialize(text, account = nil)
  5. @text = text
  6. @account = account
  7. @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
  8. end
  9. def to_iso_s
  10. detected_language_code || default_locale
  11. end
  12. def prepared_text
  13. simplified_text.strip
  14. end
  15. private
  16. def detected_language_code
  17. iso6391(result.language).to_sym if detected_language_reliable?
  18. end
  19. def iso6391(bcp47)
  20. iso639 = bcp47.split('-').first
  21. # CLD3 returns grandfathered language code for Hebrew
  22. return 'he' if iso639 == 'iw'
  23. ISO_639.find(iso639).alpha2
  24. end
  25. def result
  26. @result ||= @identifier.find_language(prepared_text)
  27. end
  28. def detected_language_reliable?
  29. result.reliable?
  30. end
  31. def simplified_text
  32. text.dup.tap do |new_text|
  33. new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
  34. new_text.gsub!(Account::MENTION_RE, '')
  35. new_text.gsub!(Tag::HASHTAG_RE, '')
  36. new_text.gsub!(/\s+/, ' ')
  37. end
  38. end
  39. def default_locale
  40. account&.user_locale&.to_sym || nil
  41. end
  42. end