From c092fc9fd64c71dd968c48e28685c470d7d6afd9 Mon Sep 17 00:00:00 2001 From: ilja Date: Mon, 19 Dec 2022 13:06:39 +0000 Subject: [PATCH] Add translation module for Argos Translate (#351) Argos Translate is a Python module for translation and can be used as a command line tool. This is also the engine for LibreTranslate, for which we already have a module. Here we can use the engine directly from our server without doing requests to a third party or having to install our own LibreTranslate webservice (obviously you do have to install Argos Translate). One thing that's currently still missing from Argos Translate is auto-detection of languages (see ). For now, when no source language is provided, we just return the text unchanged, supposedly translated from the target language. That way you get a near immediate response in pleroma-fe when clicking Translate, after which you can select the source language from a dropdown. Argos Translate also doesn't seem to handle html very well. Therefore we give admins the option to strip the html before translating. I made this an option because I'm unsure if/how this will change in the future. Co-authored-by: ilja Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/351 Co-authored-by: ilja Co-committed-by: ilja --- CHANGELOG.md | 1 + config/config.exs | 5 + config/description.exs | 25 ++++ docs/docs/configuration/cheatsheet.md | 10 +- .../akkoma/translators/argos_translate.ex | 109 ++++++++++++++++++ .../translators/argos_translate_test.exs | 93 +++++++++++++++ 6 files changed, 241 insertions(+), 2 deletions(-) create mode 100644 lib/pleroma/akkoma/translators/argos_translate.ex create mode 100644 test/pleroma/akkoma/translators/argos_translate_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 5019fc2f2..3930a25ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Prometheus metrics exporting from `/api/v1/akkoma/metrics` - Ability to alter http pool size +- Translation of statuses via ArgosTranslate ### Removed - Non-finch HTTP adapters diff --git a/config/config.exs b/config/config.exs index 48290fb05..0611f7c26 100644 --- a/config/config.exs +++ b/config/config.exs @@ -882,6 +882,11 @@ config :pleroma, :libre_translate, url: "http://127.0.0.1:5000", api_key: nil +config :pleroma, :argos_translate, + command_argos_translate: "argos-translate", + command_argospm: "argospm", + strip_html: true + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/config/description.exs b/config/description.exs index c43e25468..5d7ea7d25 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3442,5 +3442,30 @@ config :pleroma, :config_description, [ suggestion: [nil] } ] + }, + %{ + group: :pleroma, + key: :argos_translate, + type: :group, + description: "ArgosTranslate Settings.", + children: [ + %{ + key: :command_argos_translate, + type: :string, + description: "command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file.", + suggestion: ["argos-translate"] + }, + %{ + key: :command_argospm, + type: :string, + description: "command for `argospm`. Can be the command if it's in your PATH, or the full path to the file.", + suggestion: ["argospm"] + }, + %{ + key: :strip_html, + type: :boolean, + description: "Strip html from the post before translating it." + } + ] } ] diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 22fc4ecbe..4e84b9a44 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -1119,7 +1119,7 @@ Each job has these settings: ### Translation Settings Settings to automatically translate statuses for end users. Currently supported -translation services are DeepL and LibreTranslate. +translation services are DeepL and LibreTranslate. The supported command line tool is [Argos Translate](https://github.com/argosopentech/argos-translate). Translations are available at `/api/v1/statuses/:id/translations/:language`, where `language` is the target language code (e.g `en`) @@ -1128,7 +1128,7 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe - `:enabled` - enables translation - `:module` - Sets module to be used - - Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate` + - Either `Pleroma.Akkoma.Translators.DeepL`, `Pleroma.Akkoma.Translators.LibreTranslate`, or `Pleroma.Akkoma.Translators.ArgosTranslate` ### `:deepl` @@ -1140,3 +1140,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe - `:url` - URL of LibreTranslate instance - `:api_key` - API key for LibreTranslate + +### `:argos_translate` + +- `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`). +- `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`). +- `:strip_html` - Strip html from the post before translating it (default: `true`). diff --git a/lib/pleroma/akkoma/translators/argos_translate.ex b/lib/pleroma/akkoma/translators/argos_translate.ex new file mode 100644 index 000000000..dfec81d0a --- /dev/null +++ b/lib/pleroma/akkoma/translators/argos_translate.ex @@ -0,0 +1,109 @@ +defmodule Pleroma.Akkoma.Translators.ArgosTranslate do + @behaviour Pleroma.Akkoma.Translator + + alias Pleroma.Config + + defp argos_translate do + Config.get([:argos_translate, :command_argos_translate]) + end + + defp argospm do + Config.get([:argos_translate, :command_argospm]) + end + + defp strip_html? do + Config.get([:argos_translate, :strip_html]) + end + + defp safe_languages() do + try do + System.cmd(argospm(), ["list"], stderr_to_stdout: true, parallelism: true) + rescue + _ -> {"Command #{argospm()} not found", 1} + end + end + + @impl Pleroma.Akkoma.Translator + def languages do + with {response, 0} <- safe_languages() do + langs = + response + |> String.split("\n", trim: true) + |> Enum.map(fn + "translate-" <> l -> String.split(l, "_") + end) + + source_langs = + langs + |> Enum.map(fn [l, _] -> %{code: l, name: l} end) + |> Enum.uniq() + + dest_langs = + langs + |> Enum.map(fn [_, l] -> %{code: l, name: l} end) + |> Enum.uniq() + + {:ok, source_langs, dest_langs} + else + {response, _} -> {:error, "ArgosTranslate failed to fetch languages (#{response})"} + end + end + + defp safe_translate(string, from_language, to_language) do + try do + System.cmd( + argos_translate(), + ["--from-lang", from_language, "--to-lang", to_language, string], + stderr_to_stdout: true, + parallelism: true + ) + rescue + _ -> {"Command #{argos_translate()} not found", 1} + end + end + + defp clean_string(string, true) do + string + |> String.replace("

", "\n") + |> String.replace("

", "\n") + |> String.replace("
", "\n") + |> String.replace("
", "\n") + |> String.replace("
  • ", "\n") + |> Pleroma.HTML.strip_tags() + |> HtmlEntities.decode() + end + + defp clean_string(string, _), do: string + + defp htmlify_response(string, true) do + string + |> HtmlEntities.encode() + |> String.replace("\n", "
    ") + end + + defp htmlify_response(string, _), do: string + + @impl Pleroma.Akkoma.Translator + def translate(string, nil, to_language) do + # Akkoma's Pleroma-fe expects us to detect the source language automatically. + # Argos-translate doesn't have that option (yet?) + # see + # For now we return the text unchanged, supposedly translated from the target language. + # Afterwards people get the option to overwrite the source language from a dropdown. + {:ok, to_language, string} + end + + def translate(string, from_language, to_language) do + # Argos Translate doesn't properly translate HTML (yet?) + # For now we give admins the option to strip the html before translating + # Note that we have to add some html back to the response afterwards + string = clean_string(string, strip_html?()) + + with {translated, 0} <- + safe_translate(string, from_language, to_language) do + {:ok, from_language, translated |> htmlify_response(strip_html?())} + else + {response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"} + end + end +end diff --git a/test/pleroma/akkoma/translators/argos_translate_test.exs b/test/pleroma/akkoma/translators/argos_translate_test.exs new file mode 100644 index 000000000..977df1693 --- /dev/null +++ b/test/pleroma/akkoma/translators/argos_translate_test.exs @@ -0,0 +1,93 @@ +defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do + alias Pleroma.Akkoma.Translators.ArgosTranslate + + import Mock + + use Pleroma.DataCase, async: true + + setup do + clear_config([:argos_translate, :command_argos_translate], "argos-translate_test") + clear_config([:argos_translate, :command_argospm], "argospm_test") + end + + test "it lists available languages" do + languages = + with_mock System, [:passthrough], + cmd: fn "argospm_test", ["list"], _ -> + {"translate-nl_en\ntranslate-en_nl\ntranslate-ja_en\n", 0} + end do + ArgosTranslate.languages() + end + + assert {:ok, source_langs, dest_langs} = languages + + assert [%{code: "en", name: "en"}, %{code: "ja", name: "ja"}, %{code: "nl", name: "nl"}] = + source_langs |> Enum.sort() + + assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort() + end + + test "it translates from the to language when no language is set and returns the text unchanged" do + assert {:ok, "nl", "blabla"} = ArgosTranslate.translate("blabla", nil, "nl") + end + + test "it translates from the provided language if provided" do + translation_response = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ -> + {"yadayada", 0} + end do + ArgosTranslate.translate("blabla", "nl", "en") + end + + assert {:ok, "nl", "yadayada"} = translation_response + end + + test "it returns a proper error when the executable can't be found" do + non_existing_command = "sfqsfgqsefd" + clear_config([:argos_translate, :command_argos_translate], non_existing_command) + clear_config([:argos_translate, :command_argospm], non_existing_command) + + assert nil == System.find_executable(non_existing_command) + + assert {:error, "ArgosTranslate failed to fetch languages" <> _} = ArgosTranslate.languages() + + assert {:error, "ArgosTranslate failed to translate" <> _} = + ArgosTranslate.translate("blabla", "nl", "en") + end + + test "it can strip html" do + content = + ~s[

    What's up my fellow fedizens?

    So anyway

    ammiright!
    :ablobfoxhyper:

    ] + + stripped_content = + "\nWhat's up my fellow fedizens?\n\nSo anyway\n\n#cofe\n#Suya\nammiright!\n:ablobfoxhyper:\n" + + expected_response_strip_html = + "
    What's up my fellow fedizens?

    So anyway

    #cofe
    #Suya
    ammiright!
    :ablobfoxhyper:
    " + + response_strip_html = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", + ["--from-lang", _, "--to-lang", _, ^stripped_content], + _ -> + {stripped_content, 0} + end do + ArgosTranslate.translate(content, "nl", "en") + end + + clear_config([:argos_translate, :strip_html], false) + + response_no_strip_html = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", ["--from-lang", _, "--to-lang", _, string], _ -> + {string, 0} + end do + ArgosTranslate.translate(content, "nl", "en") + end + + assert {:ok, "nl", content} == response_no_strip_html + + assert {:ok, "nl", expected_response_strip_html} == response_strip_html + end +end