diff --git a/lib/microsoft_translate_client.rb b/lib/microsoft_translate_client.rb new file mode 100644 index 000000000..23b595403 --- /dev/null +++ b/lib/microsoft_translate_client.rb @@ -0,0 +1,81 @@ +require "translator-text" +include SentencesParser + +class MicrosoftTranslateClient + CHARACTERS_LIMIT_PER_REQUEST = 5000 + PREVENTING_TRANSLATION_KEY = "notranslate" + + def initialize + api_key = Rails.application.secrets.microsoft_api_key + @client = TranslatorText::Client.new(api_key) + end + + def call(fields_values, locale) + texts = prepare_texts(fields_values) + valid_locale = parse_locale(locale) + request_translation(texts, valid_locale) + end + + def fragments_for(text) + return [text] if text.size <= CHARACTERS_LIMIT_PER_REQUEST + + split_position = detect_split_position(text) + start_text = text[0..split_position] + end_text = text[split_position + 1 .. text.size] + + fragments_for(start_text) + [end_text] + end + + private + + def request_translation(texts, locale) + response = [] + split_response = false + + if characters_count(texts) <= CHARACTERS_LIMIT_PER_REQUEST + response = @client.translate(texts, to: locale) + else + texts.each do |text| + response << translate_text(text, locale) + end + split_response = true + end + + parse_response(response, split_response) + end + + def translate_text(text, locale) + fragments_for(text).map do |fragment| + @client.translate([fragment], to: locale) + end.flatten + end + + def parse_response(response, split_response) + response.map do |object| + if split_response + build_translation(object) + else + get_field_value(object) + end + end + end + + def build_translation(objects) + objects.map { |object| get_field_value(object) }.join + end + + def get_field_value(object) + text = object.translations[0].text + notranslate?(text) ? nil : text + end + + def prepare_texts(texts) + texts.map { |text| text || PREVENTING_TRANSLATION_KEY } + #https://docs.microsoft.com/es-es/azure/cognitive-services/translator/prevent-translation + end + + def notranslate?(text) + text.downcase == PREVENTING_TRANSLATION_KEY + end + +end diff --git a/lib/sentences_parser.rb b/lib/sentences_parser.rb new file mode 100644 index 000000000..890a6eb57 --- /dev/null +++ b/lib/sentences_parser.rb @@ -0,0 +1,24 @@ +module SentencesParser + + def detect_split_position(text) + minimum_valid_index = text.size - MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST + valid_point = text[minimum_valid_index..text.size].index(".") + valid_whitespace = text[minimum_valid_index..text.size].index(" ") + + get_split_position(valid_point, valid_whitespace, minimum_valid_index) + end + + def get_split_position(valid_point, valid_whitespace, minimum_valid_index) + split_position = minimum_valid_index + if valid_point.present? || valid_whitespace.present? + valid_position = valid_point.present? ? valid_point : valid_whitespace + split_position = split_position + valid_position + end + split_position + end + + def characters_count(texts) + texts.map(&:size).reduce(:+) + end + +end diff --git a/spec/lib/microsoft_translate_client_spec.rb b/spec/lib/microsoft_translate_client_spec.rb new file mode 100644 index 000000000..e93203502 --- /dev/null +++ b/spec/lib/microsoft_translate_client_spec.rb @@ -0,0 +1,168 @@ +require "rails_helper" + +describe MicrosoftTranslateClient do + + let(:microsoft_client) { described_class.new } + + describe "#call" do + + context "when characters from request are less than the characters limit" do + it "response has the expected result" do + response = create_response("Nuevo título", "Nueva descripción") + + expect_any_instance_of(TranslatorText::Client).to receive(:translate).and_return(response) + + result = microsoft_client.call([ "New title", "New description"], :es) + + expect(result).to eq(["Nuevo título", "Nueva descripción"]) + end + + it "response nil has the expected result when request has nil value" do + response = create_response("Notranslate", "Nueva descripción") + + expect_any_instance_of(TranslatorText::Client).to receive(:translate).and_return(response) + + result = microsoft_client.call([nil, "New description"], :es) + + expect(result).to eq([nil, "Nueva descripción"]) + end + + end + + context "when characters from request are greater than characters limit" do + it "response has the expected result when the request has 2 texts, where both less than CHARACTERS_LIMIT_PER_REQUEST" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 20) + text_en = Faker::Lorem.characters(11) + another_text_en = Faker::Lorem.characters(11) + + translated_text_es = Faker::Lorem.characters(11) + another_translated_text_es = Faker::Lorem.characters(11) + response_text = create_response(translated_text_es) + response_another_text = create_response(another_translated_text_es) + + expect_any_instance_of(TranslatorText::Client).to receive(:translate).exactly(1) + .times + .and_return(response_text) + expect_any_instance_of(TranslatorText::Client).to receive(:translate).exactly(1) + .times + .and_return(response_another_text) + + result = microsoft_client.call([text_en, another_text_en], :es) + + expect(result).to eq([translated_text_es, another_translated_text_es]) + end + + it "response has the expected result when the request has 2 texts and both are greater than CHARACTERS_LIMIT_PER_REQUEST" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 20) + start_text_en = Faker::Lorem.characters(10) + " " + end_text_en = Faker::Lorem.characters(10) + text_en = start_text_en + end_text_en + + start_translated_text_es = Faker::Lorem.characters(10) + " " + end_translated_text_es = Faker::Lorem.characters(10) + translated_text_es = start_translated_text_es + end_translated_text_es + response_start_text = create_response(start_translated_text_es) + response_end_text = create_response(end_translated_text_es) + + expect_any_instance_of(TranslatorText::Client).to receive(:translate).with([start_text_en], to: :es) + .exactly(1) + .times + .and_return(response_start_text) + expect_any_instance_of(TranslatorText::Client).to receive(:translate).with([end_text_en], to: :es) + .exactly(1) + .times + .and_return(response_end_text) + + start_another_text_en = Faker::Lorem.characters(12) + "." + end_another_text_en = Faker::Lorem.characters(12) + another_text_en = start_another_text_en + end_another_text_en + + another_start_translated_text_es = Faker::Lorem.characters(12) + "." + another_end_translated_text_es = Faker::Lorem.characters(12) + another_translated_text_es = another_start_translated_text_es + another_end_translated_text_es + response_another_start_text = create_response(another_start_translated_text_es) + response_another_end_text = create_response(another_end_translated_text_es) + + expect_any_instance_of(TranslatorText::Client).to receive(:translate).with([start_another_text_en], to: :es) + .exactly(1) + .times + .and_return(response_another_start_text) + expect_any_instance_of(TranslatorText::Client).to receive(:translate).with([end_another_text_en], to: :es) + .exactly(1) + .times + .and_return(response_another_end_text) + + result = microsoft_client.call([text_en, another_text_en], :es) + + expect(result).to eq([translated_text_es, another_translated_text_es]) + end + + end + + end + + describe "#detect_split_position" do + + context "text has less characters than characters limit" do + it "does not split the text" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 20) + text_to_translate = Faker::Lorem.characters(10) + + result = microsoft_client.fragments_for(text_to_translate) + + expect(result).to eq [text_to_translate] + end + end + + context "text has more characters than characters limit" do + it "to split text by first valid dot when there is a dot for split" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 20) + start_text = Faker::Lorem.characters(10) + "." + end_text = Faker::Lorem.characters(10) + text_to_translate = start_text + end_text + + result = microsoft_client.fragments_for(text_to_translate) + + expect(result).to eq([start_text, end_text]) + end + + it "to split text by first valid space when there is not a dot for split but there is a space" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 20) + start_text = Faker::Lorem.characters(10) + " " + end_text = Faker::Lorem.characters(10) + text_to_translate = start_text + end_text + + result = microsoft_client.fragments_for(text_to_translate) + + expect(result).to eq([start_text, end_text]) + end + + it "to split text in the middle of a word when there are not valid dots and spaces" do + stub_const("MicrosoftTranslateClient::CHARACTERS_LIMIT_PER_REQUEST", 40) + sub_part_text_1 = Faker::Lorem.characters(5) + " ." + sub_part_text_2 = Faker::Lorem.characters(5) + sub_part_text_3 = Faker::Lorem.characters(9) + sub_part_text_4 = Faker::Lorem.characters(30) + text_to_translate = sub_part_text_1 + sub_part_text_2 + sub_part_text_3 + sub_part_text_4 + + result = microsoft_client.fragments_for(text_to_translate) + + expect(result).to eq([sub_part_text_1 + sub_part_text_2, sub_part_text_3 + sub_part_text_4]) + end + + end + end +end + +def create_response(*args) + # response = [#] detectedLanguage={"language"=>"en", "score"=>1.0}>, #] detectedLanguage={"language"=>"en", "score"=>1.0}>] + translations = Struct.new(:translations) + text = Struct.new(:text) + response = [] + + args.each do |text_to_response| + response << translations.new([text.new(text_to_response)]) + end + + response +end