From 3f0440ac3c38b88fe449da9b8281d1dbadfa36d1 Mon Sep 17 00:00:00 2001 From: Sir_Boops Date: Sun, 15 Apr 2018 17:37:51 -0600 Subject: [PATCH] Dedupe uploads --- lib/mix/tasks/sample_config.eex | 3 +- lib/pleroma/upload.ex | 116 ++++++++++++++----- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- test/upload_test.exs | 45 +++---- 4 files changed, 117 insertions(+), 49 deletions(-) diff --git a/lib/mix/tasks/sample_config.eex b/lib/mix/tasks/sample_config.eex index e37c864c0..d57591d53 100644 --- a/lib/mix/tasks/sample_config.eex +++ b/lib/mix/tasks/sample_config.eex @@ -8,7 +8,8 @@ config :pleroma, :instance, name: "<%= name %>", email: "<%= email %>", limit: 5000, - registrations_open: true + registrations_open: true, + dedupe_media: true config :pleroma, :media_proxy, enabled: false, diff --git a/lib/pleroma/upload.ex b/lib/pleroma/upload.ex index e5df94009..ab4bd16f0 100644 --- a/lib/pleroma/upload.ex +++ b/lib/pleroma/upload.ex @@ -2,20 +2,21 @@ defmodule Pleroma.Upload do alias Ecto.UUID alias Pleroma.Web - def store(%Plug.Upload{} = file) do - uuid = UUID.generate() - upload_folder = Path.join(upload_path(), uuid) - File.mkdir_p!(upload_folder) - result_file = Path.join(upload_folder, file.filename) - File.cp!(file.path, result_file) + def store(%Plug.Upload{} = file, should_dedupe) do + content_type = get_content_type(file.path) + uuid = get_uuid(file, should_dedupe) + name = get_name(file, uuid, content_type, should_dedupe) + upload_folder = get_upload_path(uuid, should_dedupe) + url_path = get_url(name, uuid, should_dedupe) - # fix content type on some image uploads - content_type = - if file.content_type in [nil, "application/octet-stream"] do - get_content_type(file.path) - else - file.content_type - end + File.mkdir_p!(upload_folder) + result_file = Path.join(upload_folder, name) + + if File.exists?(result_file) do + File.rm!(file.path) + else + File.cp!(file.path, result_file) + end %{ "type" => "Image", @@ -23,26 +24,48 @@ defmodule Pleroma.Upload do %{ "type" => "Link", "mediaType" => content_type, - "href" => url_for(Path.join(uuid, :cow_uri.urlencode(file.filename))) + "href" => url_path } ], - "name" => file.filename, - "uuid" => uuid + "name" => name } end - def store(%{"img" => "data:image/" <> image_data}) do + def store(%{"img" => "data:image/" <> image_data}, should_dedupe) do parsed = Regex.named_captures(~r/(?jpeg|png|gif);base64,(?.*)/, image_data) - data = Base.decode64!(parsed["data"]) + data = Base.decode64!(parsed["data"], ignore: :whitespace) uuid = UUID.generate() - upload_folder = Path.join(upload_path(), uuid) + uuidpath = Path.join(upload_path(), uuid) + uuid = UUID.generate() + + File.mkdir_p!(upload_path()) + + File.write!(uuidpath, data) + + content_type = get_content_type(uuidpath) + + name = + create_name( + String.downcase(Base.encode16(:crypto.hash(:sha256, data))), + parsed["filetype"], + content_type + ) + + upload_folder = get_upload_path(uuid, should_dedupe) + url_path = get_url(name, uuid, should_dedupe) + File.mkdir_p!(upload_folder) - filename = Base.encode16(:crypto.hash(:sha256, data)) <> ".#{parsed["filetype"]}" - result_file = Path.join(upload_folder, filename) + result_file = Path.join(upload_folder, name) - File.write!(result_file, data) - - content_type = "image/#{parsed["filetype"]}" + if should_dedupe do + if !File.exists?(result_file) do + File.rename(uuidpath, result_file) + else + File.rm!(uuidpath) + end + else + File.rename(uuidpath, result_file) + end %{ "type" => "Image", @@ -50,11 +73,10 @@ defmodule Pleroma.Upload do %{ "type" => "Link", "mediaType" => content_type, - "href" => url_for(Path.join(uuid, :cow_uri.urlencode(filename))) + "href" => url_path } ], - "name" => filename, - "uuid" => uuid + "name" => name } end @@ -63,6 +85,46 @@ defmodule Pleroma.Upload do Keyword.fetch!(settings, :uploads) end + defp create_name(uuid, ext, type) do + if type == "application/octet-stream" do + String.downcase(Enum.join([uuid, ext], ".")) + else + String.downcase(Enum.join([uuid, List.last(String.split(type, "/"))], ".")) + end + end + + defp get_uuid(file, should_dedupe) do + if should_dedupe do + Base.encode16(:crypto.hash(:sha256, File.read!(file.path))) + else + UUID.generate() + end + end + + defp get_name(file, uuid, type, should_dedupe) do + if should_dedupe do + create_name(uuid, List.last(String.split(file.filename, ".")), type) + else + file.filename + end + end + + defp get_upload_path(uuid, should_dedupe) do + if should_dedupe do + upload_path() + else + Path.join(upload_path(), uuid) + end + end + + defp get_url(name, uuid, should_dedupe) do + if should_dedupe do + url_for(:cow_uri.urlencode(name)) + else + url_for(Path.join(uuid, :cow_uri.urlencode(name))) + end + end + defp url_for(file) do "#{Web.base_url()}/media/#{file}" end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 4e0be5ba2..3a03f5fe4 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -492,7 +492,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do end def upload(file) do - data = Upload.store(file) + data = Upload.store(file, Application.get_env(:pleroma, :instance)[:dedupe_media]) Repo.insert(%Object{data: data}) end diff --git a/test/upload_test.exs b/test/upload_test.exs index d68b3e7ba..645f10293 100644 --- a/test/upload_test.exs +++ b/test/upload_test.exs @@ -3,40 +3,45 @@ defmodule Pleroma.UploadTest do use Pleroma.DataCase describe "Storing a file" do - test "copies the file to the configured folder" do + test "copies the file to the configured folder with deduping" do + File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") + file = %Plug.Upload{ content_type: "image/jpg", - path: Path.absname("test/fixtures/image.jpg"), + path: Path.absname("test/fixtures/image_tmp.jpg"), filename: "an [image.jpg" } - data = Upload.store(file) - assert data["name"] == "an [image.jpg" + data = Upload.store(file, true) - assert List.first(data["url"])["href"] == - "http://localhost:4001/media/#{data["uuid"]}/an%20%5Bimage.jpg" + assert data["name"] == + "e7a6d0cf595bff76f14c9a98b6c199539559e8b844e02e51e5efcfd1f614a2df.jpeg" end - test "fixes an incorrect content type" do + test "copies the file to the configured folder without deduping" do + File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") + + file = %Plug.Upload{ + content_type: "image/jpg", + path: Path.absname("test/fixtures/image_tmp.jpg"), + filename: "an [image.jpg" + } + + data = Upload.store(file, false) + assert data["name"] == "an [image.jpg" + end + + test "fixes incorrect content type" do + File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") + file = %Plug.Upload{ content_type: "application/octet-stream", - path: Path.absname("test/fixtures/image.jpg"), + path: Path.absname("test/fixtures/image_tmp.jpg"), filename: "an [image.jpg" } - data = Upload.store(file) + data = Upload.store(file, true) assert hd(data["url"])["mediaType"] == "image/jpeg" end - - test "does not modify a valid content type" do - file = %Plug.Upload{ - content_type: "image/png", - path: Path.absname("test/fixtures/image.jpg"), - filename: "an [image.jpg" - } - - data = Upload.store(file) - assert hd(data["url"])["mediaType"] == "image/png" - end end end