Browse Source

Language improvements, replace whatlanguage with CLD (#2753)

* add failing en specs

* add cld2 gem

* Replace WhatLanguage with CLD
closed-social-glitch-2
Matt Jankowski 7 years ago
committed by Eugen Rochko
parent
commit
8c5ad23b24
4 changed files with 33 additions and 13 deletions
  1. +1
    -1
      Gemfile
  2. +4
    -2
      Gemfile.lock
  3. +13
    -1
      app/lib/language_detector.rb
  4. +15
    -9
      spec/lib/language_detector_spec.rb

+ 1
- 1
Gemfile View File

@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
gem 'paperclip-av-transcoder' gem 'paperclip-av-transcoder'
gem 'addressable' gem 'addressable'
gem 'cld2', require: 'cld'
gem 'devise' gem 'devise'
gem 'devise-two-factor' gem 'devise-two-factor'
gem 'doorkeeper' gem 'doorkeeper'
@ -56,7 +57,6 @@ gem 'statsd-instrument'
gem 'twitter-text' gem 'twitter-text'
gem 'tzinfo-data' gem 'tzinfo-data'
gem 'webpacker', '~>1.2' gem 'webpacker', '~>1.2'
gem 'whatlanguage'
# For some reason the view specs start failing without this # For some reason the view specs start failing without this
gem 'react-rails' gem 'react-rails'

+ 4
- 2
Gemfile.lock View File

@ -102,6 +102,8 @@ GEM
rack-test (>= 0.5.4) rack-test (>= 0.5.4)
xpath (~> 2.0) xpath (~> 2.0)
chunky_png (1.3.8) chunky_png (1.3.8)
cld2 (1.0.3)
ffi (~> 1.9.3)
climate_control (0.1.0) climate_control (0.1.0)
cocaine (0.5.8) cocaine (0.5.8)
climate_control (>= 0.0.3, < 1.0) climate_control (>= 0.0.3, < 1.0)
@ -153,6 +155,7 @@ GEM
faker (1.7.3) faker (1.7.3)
i18n (~> 0.5) i18n (~> 0.5)
fast_blank (1.0.0) fast_blank (1.0.0)
ffi (1.9.18)
fuubar (2.2.0) fuubar (2.2.0)
rspec-core (~> 3.0) rspec-core (~> 3.0)
ruby-progressbar (~> 1.4) ruby-progressbar (~> 1.4)
@ -463,7 +466,6 @@ GEM
websocket-driver (0.6.5) websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2) websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0) xpath (2.0.0)
nokogiri (~> 1.3) nokogiri (~> 1.3)
@ -484,6 +486,7 @@ DEPENDENCIES
capistrano-rbenv capistrano-rbenv
capistrano-yarn capistrano-yarn
capybara capybara
cld2
devise devise
devise-two-factor devise-two-factor
doorkeeper doorkeeper
@ -549,7 +552,6 @@ DEPENDENCIES
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
webmock webmock
webpacker (~> 1.2) webpacker (~> 1.2)
whatlanguage
RUBY VERSION RUBY VERSION
ruby 2.4.1p111 ruby 2.4.1p111

+ 13
- 1
app/lib/language_detector.rb View File

@ -9,11 +9,23 @@ class LanguageDetector
end end
def to_iso_s def to_iso_s
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
detected_language_code || default_locale.to_sym
end end
private private
def detected_language_code
detected_language[:code].to_sym if detected_language_reliable?
end
def detected_language
@_detected_language ||= CLD.detect_language(text_without_urls)
end
def detected_language_reliable?
detected_language[:reliable]
end
def text_without_urls def text_without_urls
text.dup.tap do |new_text| text.dup.tap do |new_text|
URI.extract(new_text).each do |url| URI.extract(new_text).each do |url|

+ 15
- 9
spec/lib/language_detector_spec.rb View File

@ -3,11 +3,17 @@ require 'rails_helper'
describe LanguageDetector do describe LanguageDetector do
describe 'to_iso_s' do describe 'to_iso_s' do
it 'detects english language' do
string = 'Hello and welcome to mastodon'
result = described_class.new(string).to_iso_s
expect(result).to eq :en
it 'detects english language for basic strings' do
strings = [
"Hello and welcome to mastodon",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.new(string).to_iso_s
expect(result).to eq(:en), string
end
end end
it 'detects spanish language' do it 'detects spanish language' do
@ -19,15 +25,15 @@ describe LanguageDetector do
describe 'when language can\'t be detected' do describe 'when language can\'t be detected' do
it 'confirm language engine cant detect' do it 'confirm language engine cant detect' do
result = WhatLanguage.new(:all).language_iso('')
expect(result).to be_nil
result = CLD.detect_language('')
expect(result[:reliable]).to be false
end end
describe 'because of a URL' do describe 'because of a URL' do
it 'uses default locale when sent just a URL' do it 'uses default locale when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
wl_result = WhatLanguage.new(:all).language_iso(string)
expect(wl_result).not_to eq :en
cld_result = CLD.detect_language(string)[:code]
expect(cld_result).not_to eq :en
result = described_class.new(string).to_iso_s result = described_class.new(string).to_iso_s

Loading…
Cancel
Save