OTP Basics
Master OTP (Open Telecom Platform) - the framework for building fault-tolerant, scalable applications with supervisors and applications.
What is OTP?
OTP is a set of libraries, design principles, and patterns for building robust systems:
- Supervisors: Manage and restart failed processes
- Applications: Package and configure your system
- Behaviors: GenServer, Supervisor, Application, etc.
- Error handling: "Let it crash" with supervision
- Hot code swapping: Update running systems
Supervisors
Supervisors monitor child processes and restart them when they crash.
Basic Supervisor
defmodule MyApp.Supervisor do
use Supervisor
def start_link(init_arg) do
Supervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
end
@impl true
def init(_init_arg) do
children = [
{MyApp.Worker, []}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
# Start supervisor
{:ok, pid} = MyApp.Supervisor.start_link([])
Child Specification
# Using default child_spec
children = [
MyApp.Worker,
{MyApp.Cache, name: :my_cache},
{MyApp.Server, [:arg1, :arg2]}
]
# Custom child spec
children = [
%{
id: MyApp.Worker,
start: {MyApp.Worker, :start_link, [[]]},
restart: :permanent,
shutdown: 5000,
type: :worker
}
]
# Or using Supervisor.child_spec/2
children = [
Supervisor.child_spec({MyApp.Worker, []}, id: :worker1),
Supervisor.child_spec({MyApp.Worker, []}, id: :worker2)
]
Supervision Strategies
:one_for_one
If a child dies, only that child is restarted:
defmodule MyApp.Supervisor do
use Supervisor
def start_link do
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_) do
children = [
{Worker1, []},
{Worker2, []},
{Worker3, []}
]
# If Worker2 crashes, only Worker2 is restarted
Supervisor.init(children, strategy: :one_for_one)
end
end
:one_for_all
If any child dies, all children are terminated and restarted:
def init(_) do
children = [
{Database, []},
{Cache, []},
{WebServer, []}
]
# If Database crashes, all three are restarted
Supervisor.init(children, strategy: :one_for_all)
end
:rest_for_one
If a child dies, that child and all children started after it are restarted:
def init(_) do
children = [
{Database, []}, # 1
{Cache, []}, # 2 - depends on 1
{WebServer, []} # 3 - depends on 1 and 2
]
# If Cache crashes, Cache and WebServer restart
# If Database crashes, all three restart
Supervisor.init(children, strategy: :rest_for_one)
end
Restart Options
# :permanent - Always restart (default)
{Worker, restart: :permanent}
# :temporary - Never restart
{Worker, restart: :temporary}
# :transient - Restart only if abnormal termination
{Worker, restart: :transient}
Max Restarts
# Allow 3 restarts in 5 seconds
Supervisor.init(children,
strategy: :one_for_one,
max_restarts: 3,
max_seconds: 5
)
# If exceeded, supervisor itself crashes
Dynamic Supervisors
Add/remove children at runtime:
defmodule MyApp.DynamicSupervisor do
use DynamicSupervisor
def start_link(init_arg) do
DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
end
@impl true
def init(_init_arg) do
DynamicSupervisor.init(strategy: :one_for_one)
end
end
# Start workers dynamically
{:ok, pid} = DynamicSupervisor.start_child(
MyApp.DynamicSupervisor,
{MyApp.Worker, arg}
)
# Terminate a child
DynamicSupervisor.terminate_child(MyApp.DynamicSupervisor, pid)
# Count children
DynamicSupervisor.count_children(MyApp.DynamicSupervisor)
Supervision Trees
Organize supervisors in a hierarchy:
defmodule MyApp.Application do
use Application
def start(_type, _args) do
children = [
# Database supervisor
{MyApp.DatabaseSupervisor, []},
# Cache supervisor
{MyApp.CacheSupervisor, []},
# Web supervisor
{MyApp.WebSupervisor, []}
]
opts = [strategy: :one_for_one, name: MyApp.Supervisor]
Supervisor.start_link(children, opts)
end
end
Nested Supervisors
defmodule MyApp.WebSupervisor do
use Supervisor
def start_link do
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_) do
children = [
{Phoenix.PubSub, name: MyApp.PubSub},
MyApp.Endpoint,
{MyApp.Presence, []}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
Applications
Package your code as an OTP application:
Application Module
defmodule MyApp.Application do
use Application
@impl true
def start(_type, _args) do
children = [
{MyApp.Repo, []},
{MyApp.Cache, []},
{Phoenix.PubSub, name: MyApp.PubSub},
MyApp.Endpoint
]
opts = [strategy: :one_for_one, name: MyApp.Supervisor]
Supervisor.start_link(children, opts)
end
@impl true
def stop(_state) do
# Cleanup before application stops
:ok
end
end
mix.exs Configuration
def application do
[
mod: {MyApp.Application, []},
extra_applications: [:logger, :runtime_tools]
]
end
Application Dependencies
# Start applications in order
def application do
[
mod: {MyApp.Application, []},
extra_applications: [:logger],
included_applications: [:nested_app] # Don't auto-start
]
end
GenServer in OTP
GenServer is a behavior for building servers in supervision trees:
Full GenServer Example
defmodule MyApp.Counter do
use GenServer
# Client API
def start_link(opts) do
initial_value = Keyword.get(opts, :initial_value, 0)
GenServer.start_link(__MODULE__, initial_value, name: __MODULE__)
end
def increment do
GenServer.call(__MODULE__, :increment)
end
def get do
GenServer.call(__MODULE__, :get)
end
# Server Callbacks
@impl true
def init(initial_value) do
{:ok, initial_value}
end
@impl true
def handle_call(:increment, _from, state) do
new_state = state + 1
{:reply, new_state, new_state}
end
def handle_call(:get, _from, state) do
{:reply, state, state}
end
end
Using in Supervision Tree
children = [
{MyApp.Counter, initial_value: 100}
]
Supervisor.init(children, strategy: :one_for_one)
GenServer with Timeout
defmodule MyApp.Session do
use GenServer
@timeout 60_000 # 1 minute
def init(user_id) do
{:ok, %{user_id: user_id, last_activity: now()}, @timeout}
end
def handle_call(:get_user, _from, state) do
{:reply, state.user_id, %{state | last_activity: now()}, @timeout}
end
def handle_info(:timeout, state) do
# Session expired
{:stop, :normal, state}
end
defp now, do: System.monotonic_time(:second)
end
Task.Supervisor
Supervise one-off tasks:
defmodule MyApp.Application do
def start(_type, _args) do
children = [
{Task.Supervisor, name: MyApp.TaskSupervisor}
]
Supervisor.start_link(children, strategy: :one_for_one)
end
end
# Use it
Task.Supervisor.start_child(MyApp.TaskSupervisor, fn ->
# Do some work
process_data()
end)
# Async task with supervisor
task = Task.Supervisor.async(MyApp.TaskSupervisor, fn ->
fetch_data()
end)
result = Task.await(task)
Registry
Name processes dynamically:
Basic Registry
defmodule MyApp.Application do
def start(_type, _args) do
children = [
{Registry, keys: :unique, name: MyApp.Registry}
]
Supervisor.start_link(children, strategy: :one_for_one)
end
end
# Register a process
{:ok, pid} = GenServer.start_link(MyWorker, [], name: {:via, Registry, {MyApp.Registry, "worker1"}})
# Look up
[{pid, _}] = Registry.lookup(MyApp.Registry, "worker1")
# Send message via registry
Registry.dispatch(MyApp.Registry, "worker1", fn entries ->
for {pid, _} <- entries, do: send(pid, :hello)
end)
DynamicSupervisor with Registry
defmodule MyApp.WorkerSupervisor do
use DynamicSupervisor
def start_link(init_arg) do
DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
end
def start_worker(worker_id) do
spec = {MyApp.Worker, worker_id: worker_id}
DynamicSupervisor.start_child(__MODULE__, spec)
end
def init(_) do
DynamicSupervisor.init(strategy: :one_for_one)
end
end
defmodule MyApp.Worker do
use GenServer
def start_link(opts) do
worker_id = Keyword.fetch!(opts, :worker_id)
GenServer.start_link(__MODULE__, worker_id, name: via_tuple(worker_id))
end
defp via_tuple(worker_id) do
{:via, Registry, {MyApp.Registry, worker_id}}
end
# ... callbacks
end
Practical Examples
Connection Pool
defmodule MyApp.ConnectionPool do
use Supervisor
def start_link(pool_size) do
Supervisor.start_link(__MODULE__, pool_size, name: __MODULE__)
end
def init(pool_size) do
children = for i <- 1..pool_size do
Supervisor.child_spec(
{MyApp.Connection, []},
id: {:connection, i}
)
end
Supervisor.init(children, strategy: :one_for_one)
end
def checkout do
# Get available connection
children = Supervisor.which_children(__MODULE__)
{_id, pid, _type, _modules} = Enum.random(children)
pid
end
end
Worker Pool with Poolboy
# mix.exs
defp deps do
[{:poolboy, "~> 1.5"}]
end
# Supervisor
children = [
:poolboy.child_spec(:worker_pool, [
{:name, {:local, :worker_pool}},
{:worker_module, MyApp.Worker},
{:size, 5},
{:max_overflow, 10}
])
]
# Use pool
:poolboy.transaction(:worker_pool, fn pid ->
GenServer.call(pid, {:work, data})
end)
Circuit Breaker
defmodule MyApp.CircuitBreaker do
use GenServer
defstruct [
:failure_threshold,
:timeout,
failures: 0,
state: :closed,
opened_at: nil
]
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
def call(fun) do
GenServer.call(__MODULE__, {:call, fun})
end
def init(opts) do
state = %__MODULE__{
failure_threshold: Keyword.get(opts, :failure_threshold, 5),
timeout: Keyword.get(opts, :timeout, 60_000)
}
{:ok, state}
end
def handle_call({:call, fun}, _from, %{state: :open, opened_at: opened_at} = state) do
if System.monotonic_time(:millisecond) - opened_at > state.timeout do
# Try half-open
execute_call(fun, %{state | state: :half_open})
else
{:reply, {:error, :circuit_open}, state}
end
end
def handle_call({:call, fun}, _from, state) do
execute_call(fun, state)
end
defp execute_call(fun, state) do
try do
result = fun.()
new_state = %{state | failures: 0, state: :closed}
{:reply, {:ok, result}, new_state}
catch
kind, reason ->
new_failures = state.failures + 1
new_state = if new_failures >= state.failure_threshold do
%{state |
failures: new_failures,
state: :open,
opened_at: System.monotonic_time(:millisecond)
}
else
%{state | failures: new_failures}
end
{:reply, {:error, {kind, reason}}, new_state}
end
end
end
Graceful Shutdown
defmodule MyApp.Worker do
use GenServer
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
def init(_opts) do
Process.flag(:trap_exit, true)
{:ok, %{}}
end
def terminate(reason, state) do
IO.puts("Shutting down: #{inspect(reason)}")
# Cleanup: close connections, flush buffers, etc.
cleanup(state)
:ok
end
defp cleanup(state) do
# Flush pending work
# Close file handles
# Disconnect from services
:ok
end
end
Hot Code Reloading
defmodule MyApp.Server do
use GenServer
def code_change(_old_vsn, state, _extra) do
# Migrate state between versions
new_state = transform_state(state)
{:ok, new_state}
end
defp transform_state(state) do
# Transform state structure for new version
state
end
end
Application Configuration
config/config.exs
import Config
config :myapp,
ecto_repos: [MyApp.Repo]
config :myapp, MyApp.Repo,
database: "myapp_dev",
username: "postgres",
password: "postgres",
hostname: "localhost"
import_config "#{config_env()}.exs"
Reading Configuration
# In application start
def start(_type, _args) do
db_config = Application.get_env(:myapp, MyApp.Repo)
children = [
{MyApp.Repo, db_config}
]
Supervisor.start_link(children, strategy: :one_for_one)
end
Exercises
Create a supervisor with 3 workers using :one_for_one strategy
Build a DynamicSupervisor that starts/stops workers by ID
Implement a GenServer-based rate limiter that resets every minute
Create a simple connection pool supervisor
Build a supervision tree with multiple levels (root -> database supervisor -> connections)
# Solutions
# 1. Basic supervisor
defmodule MyApp.Supervisor do
use Supervisor
def start_link do
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_) do
children = [
{MyApp.Worker, id: 1},
{MyApp.Worker, id: 2},
{MyApp.Worker, id: 3}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
# 2. DynamicSupervisor with worker management
defmodule MyApp.DynamicWorkerSupervisor do
use DynamicSupervisor
def start_link do
DynamicSupervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def start_worker(id) do
spec = {MyApp.Worker, id: id}
DynamicSupervisor.start_child(__MODULE__, spec)
end
def stop_worker(pid) do
DynamicSupervisor.terminate_child(__MODULE__, pid)
end
def init(_) do
DynamicSupervisor.init(strategy: :one_for_one)
end
end
# 3. Rate limiter with reset
defmodule MyApp.RateLimiter do
use GenServer
def start_link(max_requests) do
GenServer.start_link(__MODULE__, max_requests, name: __MODULE__)
end
def check do
GenServer.call(__MODULE__, :check)
end
def init(max_requests) do
schedule_reset()
{:ok, %{max: max_requests, count: 0}}
end
def handle_call(:check, _from, state) do
if state.count < state.max do
{:reply, :ok, %{state | count: state.count + 1}}
else
{:reply, :rate_limited, state}
end
end
def handle_info(:reset, state) do
schedule_reset()
{:noreply, %{state | count: 0}}
end
defp schedule_reset do
Process.send_after(self(), :reset, 60_000)
end
end
# 4. Connection pool
defmodule MyApp.PoolSupervisor do
use Supervisor
def start_link(size) do
Supervisor.start_link(__MODULE__, size, name: __MODULE__)
end
def init(size) do
children = for i <- 1..size do
Supervisor.child_spec(
{MyApp.Connection, []},
id: {:conn, i}
)
end
Supervisor.init(children, strategy: :one_for_one)
end
end
# 5. Multi-level supervision tree
defmodule MyApp.Application do
use Application
def start(_type, _args) do
children = [
{MyApp.DatabaseSupervisor, []}
]
opts = [strategy: :one_for_one, name: MyApp.RootSupervisor]
Supervisor.start_link(children, opts)
end
end
defmodule MyApp.DatabaseSupervisor do
use Supervisor
def start_link(_) do
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_) do
children = [
{MyApp.Connection, id: :conn1},
{MyApp.Connection, id: :conn2},
{MyApp.Connection, id: :conn3}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
Next Steps
Continue to 11-ecto-databases.md to learn about database access with Ecto - schemas, queries, changesets, and migrations.