You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
4.0 KiB

  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. describe LanguageDetector do
  4. describe 'prepare_text' do
  5. it 'returns unmodified string without special cases' do
  6. string = 'just a regular string'
  7. result = described_class.instance.send(:prepare_text, string)
  8. expect(result).to eq string
  9. end
  10. it 'collapses spacing in strings' do
  11. string = 'The formatting in this is very odd'
  12. result = described_class.instance.send(:prepare_text, string)
  13. expect(result).to eq 'The formatting in this is very odd'
  14. end
  15. it 'strips usernames from strings before detection' do
  16. string = '@username Yeah, very surreal...! also @friend'
  17. result = described_class.instance.send(:prepare_text, string)
  18. expect(result).to eq 'Yeah, very surreal...! also'
  19. end
  20. it 'strips URLs from strings before detection' do
  21. string = 'Our website is https://example.com and also http://localhost.dev'
  22. result = described_class.instance.send(:prepare_text, string)
  23. expect(result).to eq 'Our website is and also'
  24. end
  25. it 'strips #hashtags from strings before detection' do
  26. string = 'Hey look at all the #animals and #fish'
  27. result = described_class.instance.send(:prepare_text, string)
  28. expect(result).to eq 'Hey look at all the and'
  29. end
  30. end
  31. describe 'detect' do
  32. let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
  33. it 'detects english language for basic strings' do
  34. strings = [
  35. "Hello and welcome to mastodon how are you today?",
  36. "I'd rather not!",
  37. "a lot of people just want to feel righteous all the time and that's all that matters",
  38. ]
  39. strings.each do |string|
  40. result = described_class.instance.detect(string, account_without_user_locale)
  41. expect(result).to eq(:en), string
  42. end
  43. end
  44. it 'detects spanish language' do
  45. string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
  46. result = described_class.instance.detect(string, account_without_user_locale)
  47. expect(result).to eq :es
  48. end
  49. describe 'when language can\'t be detected' do
  50. it 'uses nil when sent an empty document' do
  51. result = described_class.instance.detect('', account_without_user_locale)
  52. expect(result).to eq nil
  53. end
  54. describe 'because of a URL' do
  55. it 'uses nil when sent just a URL' do
  56. string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
  57. cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
  58. expect(cld_result).not_to eq :en
  59. result = described_class.instance.detect(string, account_without_user_locale)
  60. expect(result).to eq nil
  61. end
  62. end
  63. describe 'with an account' do
  64. it 'uses the account locale when present' do
  65. account = double(user_locale: 'fr')
  66. result = described_class.instance.detect('', account)
  67. expect(result).to eq nil
  68. end
  69. it 'uses nil when account is present but has no locale' do
  70. result = described_class.instance.detect('', account_without_user_locale)
  71. expect(result).to eq nil
  72. end
  73. end
  74. describe 'with an `en` default locale' do
  75. it 'uses nil for undetectable string' do
  76. result = described_class.instance.detect('', account_without_user_locale)
  77. expect(result).to eq nil
  78. end
  79. end
  80. describe 'with a non-`en` default locale' do
  81. around(:each) do |example|
  82. before = I18n.default_locale
  83. I18n.default_locale = :ja
  84. example.run
  85. I18n.default_locale = before
  86. end
  87. it 'uses nil for undetectable string' do
  88. string = ''
  89. result = described_class.instance.detect(string, account_without_user_locale)
  90. expect(result).to eq nil
  91. end
  92. end
  93. end
  94. end
  95. end