From 7738fbbaf5a6fcd6a10b4ef0a2dcea731a3d4192 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Wed, 15 Jul 2020 15:26:25 +0300 Subject: [PATCH] Connection pool: implement logging and telemetry events --- lib/pleroma/application.ex | 1 + .../gun/connection_pool/worker_supervisor.ex | 44 ++++++++++--- lib/pleroma/telemetry/logger.ex | 62 +++++++++++++++++++ 3 files changed, 100 insertions(+), 7 deletions(-) create mode 100644 lib/pleroma/telemetry/logger.ex diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index cfdaf1770..37fcdf293 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -39,6 +39,7 @@ defmodule Pleroma.Application do # every time the application is restarted, so we disable module # conflicts at runtime Code.compiler_options(ignore_module_conflict: true) + Pleroma.Telemetry.Logger.attach() Config.Holder.save_default() Pleroma.HTML.compile_scrubbers() Config.DeprecationWarnings.warn() diff --git a/lib/pleroma/gun/connection_pool/worker_supervisor.ex b/lib/pleroma/gun/connection_pool/worker_supervisor.ex index d090c034e..4b5d10d2a 100644 --- a/lib/pleroma/gun/connection_pool/worker_supervisor.ex +++ b/lib/pleroma/gun/connection_pool/worker_supervisor.ex @@ -18,8 +18,12 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do case DynamicSupervisor.start_child(__MODULE__, {Pleroma.Gun.ConnectionPool.Worker, opts}) do {:error, :max_children} -> case free_pool() do - :ok -> start_worker(opts) - :error -> {:error, :pool_full} + :ok -> + start_worker(opts) + + :error -> + :telemetry.execute([:pleroma, :connection_pool, :provision_failure], %{opts: opts}) + {:error, :pool_full} end res -> @@ -44,6 +48,14 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do |> round |> max(1) + :telemetry.execute([:pleroma, :connection_pool, :reclaim, :start], %{}, %{ + max_connections: max_connections, + reclaim_max: reclaim_max + }) + + # :ets.fun2ms( + # fn {_, {worker_pid, {_, used_by, crf, last_reference}}} when used_by == [] -> + # {worker_pid, crf, last_reference} end) unused_conns = Registry.select( @registry, @@ -55,17 +67,35 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do case unused_conns do [] -> + :telemetry.execute( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: 0}, + %{ + max_connections: max_connections + } + ) + exit(:no_unused_conns) unused_conns -> - unused_conns - |> Enum.sort(fn {_pid1, crf1, last_reference1}, {_pid2, crf2, last_reference2} -> - crf1 <= crf2 and last_reference1 <= last_reference2 - end) - |> Enum.take(reclaim_max) + reclaimed = + unused_conns + |> Enum.sort(fn {_pid1, crf1, last_reference1}, + {_pid2, crf2, last_reference2} -> + crf1 <= crf2 and last_reference1 <= last_reference2 + end) + |> Enum.take(reclaim_max) + + reclaimed |> Enum.each(fn {pid, _, _} -> DynamicSupervisor.terminate_child(__MODULE__, pid) end) + + :telemetry.execute( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: Enum.count(reclaimed)}, + %{max_connections: max_connections} + ) end end) diff --git a/lib/pleroma/telemetry/logger.ex b/lib/pleroma/telemetry/logger.ex new file mode 100644 index 000000000..d76dd37b5 --- /dev/null +++ b/lib/pleroma/telemetry/logger.ex @@ -0,0 +1,62 @@ +defmodule Pleroma.Telemetry.Logger do + @moduledoc "Transforms Pleroma telemetry events to logs" + + require Logger + + @events [ + [:pleroma, :connection_pool, :reclaim, :start], + [:pleroma, :connection_pool, :reclaim, :stop], + [:pleroma, :connection_pool, :provision_failure] + ] + def attach do + :telemetry.attach_many("pleroma-logger", @events, &handle_event/4, []) + end + + # Passing anonymous functions instead of strings to logger is intentional, + # that way strings won't be concatenated if the message is going to be thrown + # out anyway due to higher log level configured + + def handle_event( + [:pleroma, :connection_pool, :reclaim, :start], + _, + %{max_connections: max_connections, reclaim_max: reclaim_max}, + _ + ) do + Logger.debug(fn -> + "Connection pool is exhausted (reached #{max_connections} connections). Starting idle connection cleanup to reclaim as much as #{ + reclaim_max + } connections" + end) + end + + def handle_event( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: 0}, + _, + _ + ) do + Logger.error(fn -> + "Connection pool failed to reclaim any connections due to all of them being in use. It will have to drop requests for opening connections to new hosts" + end) + end + + def handle_event( + [:pleroma, :connection_pool, :reclaim, :stop], + %{reclaimed_count: reclaimed_count}, + _, + _ + ) do + Logger.debug(fn -> "Connection pool cleaned up #{reclaimed_count} idle connections" end) + end + + def handle_event( + [:pleroma, :connection_pool, :provision_failure], + %{opts: [key | _]}, + _, + _ + ) do + Logger.error(fn -> + "Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion" + end) + end +end