OTP Basics

Master OTP (Open Telecom Platform) - the framework for building fault-tolerant, scalable applications with supervisors and applications.

What is OTP?

OTP is a set of libraries, design principles, and patterns for building robust systems:

  • Supervisors: Manage and restart failed processes
  • Applications: Package and configure your system
  • Behaviors: GenServer, Supervisor, Application, etc.
  • Error handling: "Let it crash" with supervision
  • Hot code swapping: Update running systems

Supervisors

Supervisors monitor child processes and restart them when they crash.

Basic Supervisor

defmodule MyApp.Supervisor do
  use Supervisor

  def start_link(init_arg) do
    Supervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
  end

  @impl true
  def init(_init_arg) do
    children = [
      {MyApp.Worker, []}
    ]

    Supervisor.init(children, strategy: :one_for_one)
  end
end

# Start supervisor
{:ok, pid} = MyApp.Supervisor.start_link([])

Child Specification

# Using default child_spec
children = [
  MyApp.Worker,
  {MyApp.Cache, name: :my_cache},
  {MyApp.Server, [:arg1, :arg2]}
]

# Custom child spec
children = [
  %{
    id: MyApp.Worker,
    start: {MyApp.Worker, :start_link, [[]]},
    restart: :permanent,
    shutdown: 5000,
    type: :worker
  }
]

# Or using Supervisor.child_spec/2
children = [
  Supervisor.child_spec({MyApp.Worker, []}, id: :worker1),
  Supervisor.child_spec({MyApp.Worker, []}, id: :worker2)
]

Supervision Strategies

:one_for_one

If a child dies, only that child is restarted:

defmodule MyApp.Supervisor do
  use Supervisor

  def start_link do
    Supervisor.start_link(__MODULE__, [], name: __MODULE__)
  end

  def init(_) do
    children = [
      {Worker1, []},
      {Worker2, []},
      {Worker3, []}
    ]

    # If Worker2 crashes, only Worker2 is restarted
    Supervisor.init(children, strategy: :one_for_one)
  end
end

:one_for_all

If any child dies, all children are terminated and restarted:

def init(_) do
  children = [
    {Database, []},
    {Cache, []},
    {WebServer, []}
  ]

  # If Database crashes, all three are restarted
  Supervisor.init(children, strategy: :one_for_all)
end

:rest_for_one

If a child dies, that child and all children started after it are restarted:

def init(_) do
  children = [
    {Database, []},     # 1
    {Cache, []},        # 2 - depends on 1
    {WebServer, []}     # 3 - depends on 1 and 2
  ]

  # If Cache crashes, Cache and WebServer restart
  # If Database crashes, all three restart
  Supervisor.init(children, strategy: :rest_for_one)
end

Restart Options

# :permanent - Always restart (default)
{Worker, restart: :permanent}

# :temporary - Never restart
{Worker, restart: :temporary}

# :transient - Restart only if abnormal termination
{Worker, restart: :transient}

Max Restarts

# Allow 3 restarts in 5 seconds
Supervisor.init(children,
  strategy: :one_for_one,
  max_restarts: 3,
  max_seconds: 5
)

# If exceeded, supervisor itself crashes

Dynamic Supervisors

Add/remove children at runtime:

defmodule MyApp.DynamicSupervisor do
  use DynamicSupervisor

  def start_link(init_arg) do
    DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
  end

  @impl true
  def init(_init_arg) do
    DynamicSupervisor.init(strategy: :one_for_one)
  end
end

# Start workers dynamically
{:ok, pid} = DynamicSupervisor.start_child(
  MyApp.DynamicSupervisor,
  {MyApp.Worker, arg}
)

# Terminate a child
DynamicSupervisor.terminate_child(MyApp.DynamicSupervisor, pid)

# Count children
DynamicSupervisor.count_children(MyApp.DynamicSupervisor)

Supervision Trees

Organize supervisors in a hierarchy:

defmodule MyApp.Application do
  use Application

  def start(_type, _args) do
    children = [
      # Database supervisor
      {MyApp.DatabaseSupervisor, []},
      
      # Cache supervisor
      {MyApp.CacheSupervisor, []},
      
      # Web supervisor
      {MyApp.WebSupervisor, []}
    ]

    opts = [strategy: :one_for_one, name: MyApp.Supervisor]
    Supervisor.start_link(children, opts)
  end
end

Nested Supervisors

defmodule MyApp.WebSupervisor do
  use Supervisor

  def start_link do
    Supervisor.start_link(__MODULE__, [], name: __MODULE__)
  end

  def init(_) do
    children = [
      {Phoenix.PubSub, name: MyApp.PubSub},
      MyApp.Endpoint,
      {MyApp.Presence, []}
    ]

    Supervisor.init(children, strategy: :one_for_one)
  end
end

Applications

Package your code as an OTP application:

Application Module

defmodule MyApp.Application do
  use Application

  @impl true
  def start(_type, _args) do
    children = [
      {MyApp.Repo, []},
      {MyApp.Cache, []},
      {Phoenix.PubSub, name: MyApp.PubSub},
      MyApp.Endpoint
    ]

    opts = [strategy: :one_for_one, name: MyApp.Supervisor]
    Supervisor.start_link(children, opts)
  end
  
  @impl true
  def stop(_state) do
    # Cleanup before application stops
    :ok
  end
end

mix.exs Configuration

def application do
  [
    mod: {MyApp.Application, []},
    extra_applications: [:logger, :runtime_tools]
  ]
end

Application Dependencies

# Start applications in order
def application do
  [
    mod: {MyApp.Application, []},
    extra_applications: [:logger],
    included_applications: [:nested_app]  # Don't auto-start
  ]
end

GenServer in OTP

GenServer is a behavior for building servers in supervision trees:

Full GenServer Example

defmodule MyApp.Counter do
  use GenServer

  # Client API

  def start_link(opts) do
    initial_value = Keyword.get(opts, :initial_value, 0)
    GenServer.start_link(__MODULE__, initial_value, name: __MODULE__)
  end

  def increment do
    GenServer.call(__MODULE__, :increment)
  end

  def get do
    GenServer.call(__MODULE__, :get)
  end

  # Server Callbacks

  @impl true
  def init(initial_value) do
    {:ok, initial_value}
  end

  @impl true
  def handle_call(:increment, _from, state) do
    new_state = state + 1
    {:reply, new_state, new_state}
  end

  def handle_call(:get, _from, state) do
    {:reply, state, state}
  end
end

Using in Supervision Tree

children = [
  {MyApp.Counter, initial_value: 100}
]

Supervisor.init(children, strategy: :one_for_one)

GenServer with Timeout

defmodule MyApp.Session do
  use GenServer

  @timeout 60_000  # 1 minute

  def init(user_id) do
    {:ok, %{user_id: user_id, last_activity: now()}, @timeout}
  end

  def handle_call(:get_user, _from, state) do
    {:reply, state.user_id, %{state | last_activity: now()}, @timeout}
  end

  def handle_info(:timeout, state) do
    # Session expired
    {:stop, :normal, state}
  end
  
  defp now, do: System.monotonic_time(:second)
end

Task.Supervisor

Supervise one-off tasks:

defmodule MyApp.Application do
  def start(_type, _args) do
    children = [
      {Task.Supervisor, name: MyApp.TaskSupervisor}
    ]

    Supervisor.start_link(children, strategy: :one_for_one)
  end
end

# Use it
Task.Supervisor.start_child(MyApp.TaskSupervisor, fn ->
  # Do some work
  process_data()
end)

# Async task with supervisor
task = Task.Supervisor.async(MyApp.TaskSupervisor, fn ->
  fetch_data()
end)

result = Task.await(task)

Registry

Name processes dynamically:

Basic Registry

defmodule MyApp.Application do
  def start(_type, _args) do
    children = [
      {Registry, keys: :unique, name: MyApp.Registry}
    ]

    Supervisor.start_link(children, strategy: :one_for_one)
  end
end

# Register a process
{:ok, pid} = GenServer.start_link(MyWorker, [], name: {:via, Registry, {MyApp.Registry, "worker1"}})

# Look up
[{pid, _}] = Registry.lookup(MyApp.Registry, "worker1")

# Send message via registry
Registry.dispatch(MyApp.Registry, "worker1", fn entries ->
  for {pid, _} <- entries, do: send(pid, :hello)
end)

DynamicSupervisor with Registry

defmodule MyApp.WorkerSupervisor do
  use DynamicSupervisor

  def start_link(init_arg) do
    DynamicSupervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
  end

  def start_worker(worker_id) do
    spec = {MyApp.Worker, worker_id: worker_id}
    DynamicSupervisor.start_child(__MODULE__, spec)
  end

  def init(_) do
    DynamicSupervisor.init(strategy: :one_for_one)
  end
end

defmodule MyApp.Worker do
  use GenServer

  def start_link(opts) do
    worker_id = Keyword.fetch!(opts, :worker_id)
    GenServer.start_link(__MODULE__, worker_id, name: via_tuple(worker_id))
  end

  defp via_tuple(worker_id) do
    {:via, Registry, {MyApp.Registry, worker_id}}
  end

  # ... callbacks
end

Practical Examples

Connection Pool

defmodule MyApp.ConnectionPool do
  use Supervisor

  def start_link(pool_size) do
    Supervisor.start_link(__MODULE__, pool_size, name: __MODULE__)
  end

  def init(pool_size) do
    children = for i <- 1..pool_size do
      Supervisor.child_spec(
        {MyApp.Connection, []},
        id: {:connection, i}
      )
    end

    Supervisor.init(children, strategy: :one_for_one)
  end
  
  def checkout do
    # Get available connection
    children = Supervisor.which_children(__MODULE__)
    {_id, pid, _type, _modules} = Enum.random(children)
    pid
  end
end

Worker Pool with Poolboy

# mix.exs
defp deps do
  [{:poolboy, "~> 1.5"}]
end

# Supervisor
children = [
  :poolboy.child_spec(:worker_pool, [
    {:name, {:local, :worker_pool}},
    {:worker_module, MyApp.Worker},
    {:size, 5},
    {:max_overflow, 10}
  ])
]

# Use pool
:poolboy.transaction(:worker_pool, fn pid ->
  GenServer.call(pid, {:work, data})
end)

Circuit Breaker

defmodule MyApp.CircuitBreaker do
  use GenServer

  defstruct [
    :failure_threshold,
    :timeout,
    failures: 0,
    state: :closed,
    opened_at: nil
  ]

  def start_link(opts) do
    GenServer.start_link(__MODULE__, opts, name: __MODULE__)
  end

  def call(fun) do
    GenServer.call(__MODULE__, {:call, fun})
  end

  def init(opts) do
    state = %__MODULE__{
      failure_threshold: Keyword.get(opts, :failure_threshold, 5),
      timeout: Keyword.get(opts, :timeout, 60_000)
    }
    {:ok, state}
  end

  def handle_call({:call, fun}, _from, %{state: :open, opened_at: opened_at} = state) do
    if System.monotonic_time(:millisecond) - opened_at > state.timeout do
      # Try half-open
      execute_call(fun, %{state | state: :half_open})
    else
      {:reply, {:error, :circuit_open}, state}
    end
  end

  def handle_call({:call, fun}, _from, state) do
    execute_call(fun, state)
  end

  defp execute_call(fun, state) do
    try do
      result = fun.()
      new_state = %{state | failures: 0, state: :closed}
      {:reply, {:ok, result}, new_state}
    catch
      kind, reason ->
        new_failures = state.failures + 1
        
        new_state = if new_failures >= state.failure_threshold do
          %{state | 
            failures: new_failures,
            state: :open,
            opened_at: System.monotonic_time(:millisecond)
          }
        else
          %{state | failures: new_failures}
        end
        
        {:reply, {:error, {kind, reason}}, new_state}
    end
  end
end

Graceful Shutdown

defmodule MyApp.Worker do
  use GenServer

  def start_link(opts) do
    GenServer.start_link(__MODULE__, opts, name: __MODULE__)
  end

  def init(_opts) do
    Process.flag(:trap_exit, true)
    {:ok, %{}}
  end

  def terminate(reason, state) do
    IO.puts("Shutting down: #{inspect(reason)}")
    # Cleanup: close connections, flush buffers, etc.
    cleanup(state)
    :ok
  end

  defp cleanup(state) do
    # Flush pending work
    # Close file handles
    # Disconnect from services
    :ok
  end
end

Hot Code Reloading

defmodule MyApp.Server do
  use GenServer

  def code_change(_old_vsn, state, _extra) do
    # Migrate state between versions
    new_state = transform_state(state)
    {:ok, new_state}
  end

  defp transform_state(state) do
    # Transform state structure for new version
    state
  end
end

Application Configuration

config/config.exs

import Config

config :myapp,
  ecto_repos: [MyApp.Repo]

config :myapp, MyApp.Repo,
  database: "myapp_dev",
  username: "postgres",
  password: "postgres",
  hostname: "localhost"

import_config "#{config_env()}.exs"

Reading Configuration

# In application start
def start(_type, _args) do
  db_config = Application.get_env(:myapp, MyApp.Repo)
  
  children = [
    {MyApp.Repo, db_config}
  ]
  
  Supervisor.start_link(children, strategy: :one_for_one)
end

Exercises

  1. Create a supervisor with 3 workers using :one_for_one strategy

  2. Build a DynamicSupervisor that starts/stops workers by ID

  3. Implement a GenServer-based rate limiter that resets every minute

  4. Create a simple connection pool supervisor

  5. Build a supervision tree with multiple levels (root -> database supervisor -> connections)

# Solutions

# 1. Basic supervisor
defmodule MyApp.Supervisor do
  use Supervisor

  def start_link do
    Supervisor.start_link(__MODULE__, [], name: __MODULE__)
  end

  def init(_) do
    children = [
      {MyApp.Worker, id: 1},
      {MyApp.Worker, id: 2},
      {MyApp.Worker, id: 3}
    ]

    Supervisor.init(children, strategy: :one_for_one)
  end
end

# 2. DynamicSupervisor with worker management
defmodule MyApp.DynamicWorkerSupervisor do
  use DynamicSupervisor

  def start_link do
    DynamicSupervisor.start_link(__MODULE__, [], name: __MODULE__)
  end

  def start_worker(id) do
    spec = {MyApp.Worker, id: id}
    DynamicSupervisor.start_child(__MODULE__, spec)
  end

  def stop_worker(pid) do
    DynamicSupervisor.terminate_child(__MODULE__, pid)
  end

  def init(_) do
    DynamicSupervisor.init(strategy: :one_for_one)
  end
end

# 3. Rate limiter with reset
defmodule MyApp.RateLimiter do
  use GenServer

  def start_link(max_requests) do
    GenServer.start_link(__MODULE__, max_requests, name: __MODULE__)
  end

  def check do
    GenServer.call(__MODULE__, :check)
  end

  def init(max_requests) do
    schedule_reset()
    {:ok, %{max: max_requests, count: 0}}
  end

  def handle_call(:check, _from, state) do
    if state.count < state.max do
      {:reply, :ok, %{state | count: state.count + 1}}
    else
      {:reply, :rate_limited, state}
    end
  end

  def handle_info(:reset, state) do
    schedule_reset()
    {:noreply, %{state | count: 0}}
  end

  defp schedule_reset do
    Process.send_after(self(), :reset, 60_000)
  end
end

# 4. Connection pool
defmodule MyApp.PoolSupervisor do
  use Supervisor

  def start_link(size) do
    Supervisor.start_link(__MODULE__, size, name: __MODULE__)
  end

  def init(size) do
    children = for i <- 1..size do
      Supervisor.child_spec(
        {MyApp.Connection, []},
        id: {:conn, i}
      )
    end

    Supervisor.init(children, strategy: :one_for_one)
  end
end

# 5. Multi-level supervision tree
defmodule MyApp.Application do
  use Application

  def start(_type, _args) do
    children = [
      {MyApp.DatabaseSupervisor, []}
    ]

    opts = [strategy: :one_for_one, name: MyApp.RootSupervisor]
    Supervisor.start_link(children, opts)
  end
end

defmodule MyApp.DatabaseSupervisor do
  use Supervisor

  def start_link(_) do
    Supervisor.start_link(__MODULE__, [], name: __MODULE__)
  end

  def init(_) do
    children = [
      {MyApp.Connection, id: :conn1},
      {MyApp.Connection, id: :conn2},
      {MyApp.Connection, id: :conn3}
    ]

    Supervisor.init(children, strategy: :one_for_one)
  end
end

Next Steps

Continue to 11-ecto-databases.md to learn about database access with Ecto - schemas, queries, changesets, and migrations.