From d38d537beebd1efe61778b2a26ecab0bed84d1c1 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sun, 24 Feb 2019 19:13:46 +0000 Subject: [PATCH 1/4] rich media: don't crawl bogus URIs --- lib/pleroma/web/rich_media/helpers.ex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index abb1cf7f2..fc9cbc868 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -8,10 +8,17 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.HTML alias Pleroma.Web.RichMedia.Parser + defp validate_page_url(nil), do: :error + defp validate_page_url(%URI{authority: nil}), do: :error + defp validate_page_url(%URI{scheme: nil}), do: :error + defp validate_page_url(%URI{}), do: :ok + defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url + def fetch_data_for_activity(%Activity{} = activity) do with true <- Pleroma.Config.get([:rich_media, :enabled]), %Object{} = object <- Object.normalize(activity.data["object"]), {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), + :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} else From d7a278a733616d01ee41c3923a3d87730c685879 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sun, 24 Feb 2019 19:39:27 +0000 Subject: [PATCH 2/4] tests: add tests for rich media helper functions --- test/web/rich_media/helpers_test.exs | 42 ++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 test/web/rich_media/helpers_test.exs diff --git a/test/web/rich_media/helpers_test.exs b/test/web/rich_media/helpers_test.exs new file mode 100644 index 000000000..9285f078d --- /dev/null +++ b/test/web/rich_media/helpers_test.exs @@ -0,0 +1,42 @@ +defmodule Pleroma.Web.RichMedia.HelpersTest do + use Pleroma.DataCase + + alias Pleroma.Web.CommonAPI + + import Pleroma.Factory + import Tesla.Mock + + setup do + mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + :ok + end + + test "refuses to crawl incomplete URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + "status" => "[test](example.com/ogp)", + "content_type" => "text/markdown" + }) + + assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + end + + test "crawls valid, complete URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + "status" => "[test](http://example.com/ogp)", + "content_type" => "text/markdown" + }) + + Pleroma.Config.put([:rich_media, :enabled], true) + + assert %{page_url: "http://example.com/ogp", rich_media: _} = + Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + + Pleroma.Config.put([:rich_media, :enabled], false) + end +end From 9f3cb38012281c596d1aa8c479f07362fa58dacb Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sat, 2 Mar 2019 12:22:02 +0000 Subject: [PATCH 3/4] helpers: use AutoLinker to validate URIs as well as the other tests --- lib/pleroma/web/rich_media/helpers.ex | 5 ++++- test/web/rich_media/helpers_test.exs | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index fc9cbc868..ba57171d3 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -12,7 +12,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do defp validate_page_url(%URI{authority: nil}), do: :error defp validate_page_url(%URI{scheme: nil}), do: :error defp validate_page_url(%URI{}), do: :ok - defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url + + defp validate_page_url(page_url) do + AutoLinker.Parser.is_url?(page_url, true) && URI.parse(page_url) |> validate_page_url + end def fetch_data_for_activity(%Activity{} = activity) do with true <- Pleroma.Config.get([:rich_media, :enabled]), diff --git a/test/web/rich_media/helpers_test.exs b/test/web/rich_media/helpers_test.exs index 9285f078d..60d93768f 100644 --- a/test/web/rich_media/helpers_test.exs +++ b/test/web/rich_media/helpers_test.exs @@ -20,7 +20,27 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do "content_type" => "text/markdown" }) + Pleroma.Config.put([:rich_media, :enabled], true) + assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + + Pleroma.Config.put([:rich_media, :enabled], false) + end + + test "refuses to crawl malformed URLs" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + "status" => "[test](example.com[]/ogp)", + "content_type" => "text/markdown" + }) + + Pleroma.Config.put([:rich_media, :enabled], true) + + assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + + Pleroma.Config.put([:rich_media, :enabled], false) end test "crawls valid, complete URLs" do From b7aa1ea9e6ea919b4eab2762b56f7e4b09300189 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Mon, 4 Mar 2019 18:38:23 +0000 Subject: [PATCH 4/4] rich media: helpers: rework validate_page_url() --- lib/pleroma/web/rich_media/helpers.ex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index ba57171d3..8317a1162 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -8,14 +8,18 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.HTML alias Pleroma.Web.RichMedia.Parser - defp validate_page_url(nil), do: :error + defp validate_page_url(page_url) when is_binary(page_url) do + if AutoLinker.Parser.is_url?(page_url, true) do + URI.parse(page_url) |> validate_page_url + else + :error + end + end + defp validate_page_url(%URI{authority: nil}), do: :error defp validate_page_url(%URI{scheme: nil}), do: :error defp validate_page_url(%URI{}), do: :ok - - defp validate_page_url(page_url) do - AutoLinker.Parser.is_url?(page_url, true) && URI.parse(page_url) |> validate_page_url - end + defp validate_page_url(_), do: :error def fetch_data_for_activity(%Activity{} = activity) do with true <- Pleroma.Config.get([:rich_media, :enabled]),