You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
3.6 KiB

  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. describe LanguageDetector do
  4. describe 'prepared_text' do
  5. it 'returns unmodified string without special cases' do
  6. string = 'just a regular string'
  7. result = described_class.new(string).prepared_text
  8. expect(result).to eq string
  9. end
  10. it 'collapses spacing in strings' do
  11. string = 'The formatting in this is very odd'
  12. result = described_class.new(string).prepared_text
  13. expect(result).to eq 'The formatting in this is very odd'
  14. end
  15. it 'strips usernames from strings before detection' do
  16. string = '@username Yeah, very surreal...! also @friend'
  17. result = described_class.new(string).prepared_text
  18. expect(result).to eq 'Yeah, very surreal...! also'
  19. end
  20. it 'strips URLs from strings before detection' do
  21. string = 'Our website is https://example.com and also http://localhost.dev'
  22. result = described_class.new(string).prepared_text
  23. expect(result).to eq 'Our website is and also'
  24. end
  25. it 'strips #hashtags from strings before detection' do
  26. string = 'Hey look at all the #animals and #fish'
  27. result = described_class.new(string).prepared_text
  28. expect(result).to eq 'Hey look at all the and'
  29. end
  30. end
  31. describe 'to_iso_s' do
  32. it 'detects english language for basic strings' do
  33. strings = [
  34. "Hello and welcome to mastodon how are you today?",
  35. "I'd rather not!",
  36. "a lot of people just want to feel righteous all the time and that's all that matters",
  37. ]
  38. strings.each do |string|
  39. result = described_class.new(string).to_iso_s
  40. expect(result).to eq(:en), string
  41. end
  42. end
  43. it 'detects spanish language' do
  44. string = 'Obtener un Hola y bienvenidos a Mastodon'
  45. result = described_class.new(string).to_iso_s
  46. expect(result).to eq :es
  47. end
  48. describe 'when language can\'t be detected' do
  49. it 'uses nil when sent an empty document' do
  50. result = described_class.new('').to_iso_s
  51. expect(result).to eq nil
  52. end
  53. describe 'because of a URL' do
  54. it 'uses nil when sent just a URL' do
  55. string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
  56. cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
  57. expect(cld_result).not_to eq :en
  58. result = described_class.new(string).to_iso_s
  59. expect(result).to eq nil
  60. end
  61. end
  62. describe 'with an account' do
  63. it 'uses the account locale when present' do
  64. account = double(user_locale: 'fr')
  65. result = described_class.new('', account).to_iso_s
  66. expect(result).to eq :fr
  67. end
  68. it 'uses nil when account is present but has no locale' do
  69. account = double(user_locale: nil)
  70. result = described_class.new('', account).to_iso_s
  71. expect(result).to eq nil
  72. end
  73. end
  74. describe 'with an `en` default locale' do
  75. it 'uses nil for undetectable string' do
  76. string = ''
  77. result = described_class.new(string).to_iso_s
  78. expect(result).to eq nil
  79. end
  80. end
  81. describe 'with a non-`en` default locale' do
  82. around(:each) do |example|
  83. before = I18n.default_locale
  84. I18n.default_locale = :ja
  85. example.run
  86. I18n.default_locale = before
  87. end
  88. it 'uses nil for undetectable string' do
  89. string = ''
  90. result = described_class.new(string).to_iso_s
  91. expect(result).to eq nil
  92. end
  93. end
  94. end
  95. end
  96. end