Lean 4: Provably Correct Code

import Mathlib

-- =========================================================================
-- PROVABLY CORRECT CODE IN LEAN 4: MAJOR PARADIGMS
-- =========================================================================
-- This file demonstrates the different ways to write provably correct code
-- in dependent type theory, specifically Lean 4.

-- =========================================================================
-- 1. INTRINSIC TYPING (Subtypes) & PROGRAM EXTRACTION
-- =========================================================================
--
-- In this approach, we bundle the data and the proofs about that data
-- into a single type using a `Subtype` (`{ x // p x }`).
--
-- --- PROGRAM EXTRACTION (Constructive Proofs as Programs) ---
-- You write a theorem stating `∀ L, L ≠ [] → ∃ x, x ∈ L ∧ ∀ y ∈ L, y ≤ x`.
-- You then prove this theorem using tactics. Because Lean is based on constructive
-- logic, a proof of "exists x" MUST contain a mechanical way to compute `x`.
-- You can theoretically extract the executable code directly out of the proof script!
--
-- In older dependent type theory languages (like Coq), you write a theorem
-- proving `∃ x, P x` using tactics, and then "extract" the function from the proof.
--
-- In Lean 4, `Prop` (like `∃`) is COMPLETELY erased at runtime. You cannot
-- extract executable code from a purely logical proposition!
--
-- This is exactly WHY `find_max_1` uses `Subtype` (`{ x // p x }`)
-- instead of `Exists` (`∃ x, p x`). `Subtype` lives in `Type`, meaning the data
-- part (`x`) is kept at runtime, while the proof part is erased.

/--
Finds the maximal element in a list of natural numbers.
Returns a Subtype: the element `x`, paired with a proof that `x` is in `L`
and a proof that `x` is greater than or equal to all `y` in `L`.
-/
def find_max_1 : (L : List ℕ) → L ≠ [] → { x // x ∈ L ∧ ∀ y ∈ L, y ≤ x }
  -- Base Case 1: The list is empty.
  -- This contradicts the required proof `h : L ≠ []`, so we dismiss it.
  | [], h => by contradiction

  -- Base Case 2: The list has exactly one element `a`.
  | [a], _ => ⟨a, by
      constructor
      · -- Prove `a ∈ [a]`
        simp
      · -- Prove ∀ y ∈ [a], y ≤ a
        intro y hy
        simp only [List.mem_singleton] at hy
        omega⟩

  -- Inductive Step: The list has at least two elements (`a`, `b`, and a `tail`).
  | a :: b :: tail, _ =>
      -- Recursively find the maximum of the tail `b :: tail`.
      -- hm_mem, hm_max are the proofs that `m` is in the tail
      -- and that `m` is greater than or equal to all elements in the tail, respectively.
      -- They come from "deconstructing" the conjunction in the return type of `find_max_1`.
      let ⟨m, ⟨hm_mem, hm_max⟩⟩ := find_max_1 (b :: tail) (by simp)

      -- Now we have `m`, the maximum of the tail. We compare it to `a`.
      if h : m ≤ a then
        -- If m ≤ a, then 'a' is our new maximum.
        ⟨a, by
          constructor
          · -- Prove `a` is in `a :: b :: tail`
            simp
          · -- Prove ∀ y ∈ a :: b :: tail, y ≤ a
            intro y hy
            -- Convert List.Mem to an Or statement exactly once
            rcases List.mem_cons.mp hy with rfl | hy_tail
            · -- Subcase: y is the head (y = a).
              rfl
            · -- Subcase: y is in the tail.
              have := hm_max y hy_tail
              omega⟩
      else
        -- If it is NOT the case that m ≤ a (meaning a < m), then 'm' remains the maximum.
        ⟨m, by
          constructor
          · -- Prove `m` is in `a :: b :: tail`
            simp [hm_mem]
          · -- Prove ∀ y ∈ a :: b :: tail, y ≤ m
            intro y hy
            -- Convert List.Mem to an Or statement exactly once
            rcases List.mem_cons.mp hy with rfl | hy_tail
            · -- Subcase: y is the head (y = a).
              omega
            · -- Subcase: y is in the tail.
              exact hm_max y hy_tail⟩

#check find_max_1
#print find_max_1

-- --- EVALUATING INTRINSICALLY TYPED FUNCTIONS ---
-- Evaluating `find_max_1 [3, 1, 4, 5, 1]` directly fails because it requires a second
-- argument: a proof that the list is not empty.
-- We can supply this proof using `(by decide)`:
#eval find_max_1 [3, 1, 4, 5, 1] (by decide)

-- --- WHY NOT JUST USE EXISTS (∃)? ---
-- If we tried to write this as a pure logical existence theorem (using `∃` instead of Subtype),
-- it compiles, but we can NEVER execute it to get an actual number. Lean erases `Prop` at runtime:

theorem find_max_unextractable (L : List ℕ) (h : L ≠ []) : ∃ x, x ∈ L ∧ ∀ y ∈ L, y ≤ x := by
  -- We can easily extract the value and proof from our intrinsically typed function:
  let ⟨x, hx⟩ := find_max_1 L h
  exact ⟨x, hx⟩

-- If you uncomment the line below, Lean will throw an error because it cannot `#eval` a Prop:
-- #eval find_max_unextractable [3, 1, 4] (by decide)


-- =========================================================================
-- 2. EXTRINSIC TYPING: Separating the computational function from its proofs
-- =========================================================================
-- Most common and often preferred way to write provably correct code in Lean.

-- Here is the purely computational function. It returns just a natural number `ℕ`.
-- Notice how much cleaner the logic is without the inline `by` proof blocks.
def find_max_2 : (L : List ℕ) → L ≠ [] → ℕ
  | [], h => by contradiction
  | [a], _ => a
  | a :: b :: tail, _ =>
      -- We must pass a proof that `b :: tail ≠ []` to the recursive call.
      -- Even in purely computational code, dependent typing means we sometimes
      -- use tactics inline to satisfy proof arguments. The `by simp` tactic
      -- automatically proves `b :: tail ≠ []`.
      --
      -- Alternatively, we could provide a pure term proof without tactics.
      -- `b :: tail ≠ []` means `(b :: tail = []) → False`.
      -- Since constructors of inductive types are disjoint, Lean generates
      -- a `noConfusion` theorem. Thus, we could write:
      -- let m := find_max_2 (b :: tail) List.noConfusion
      let m := find_max_2 (b :: tail) (by simp)
      if m ≤ a then a else m

-- Now, we write a SEPARATE theorem to specify what the function should do.
-- We state that the result of `find_max_2` is in the list, and is >= all elements.
--
-- HOW IS THE THEOREM TIED TO THE FUNCTION?
--
-- 1. Explicit Reference: The theorem's signature explicitly calls `find_max_2 L h`.
--    This physically binds the logical property to the output of that exact function.
--
-- 2. Independent Compilation: The compiler allows `find_max_2` to compile purely as
--    code. It doesn't "know" it's supposed to find the maximum until you write the theorem.
--
-- 3. Breaking Changes: If another developer later modifies `find_max_2`
--    (e.g., they accidentally change `m ≤ a` to `m > a`),
--    the `find_max_2` function will still compile perfectly fine. BUT, the `find_max_2_correct`
--    theorem will immediately break, preventing the project from passing verification.
--    Try it.
theorem find_max_2_correct : (L : List ℕ) → (h : L ≠ []) →
  find_max_2 L h ∈ L ∧ ∀ y ∈ L, y ≤ find_max_2 L h
| [], h => by contradiction
| [a], _ => by
    dsimp [find_max_2]
    constructor
    · simp
    · intro y hy
      simp only [List.mem_singleton] at hy
      omega
| a :: b :: tail, _ => by
    -- Structurally mimic the function's recursion
    have ⟨IH_mem, IH_max⟩ := find_max_2_correct (b :: tail) (by simp)
    dsimp [find_max_2]

    -- `split` automatically branches on the `if m ≤ a then ... else ...`
    split
    · -- Case 1: m ≤ a (so find_max_2 returns 'a')
      constructor
      · simp
      · intro y hy
        rcases List.mem_cons.mp hy with rfl | hy_tail
        · omega
        · have := IH_max y hy_tail
          omega
    · -- Case 2: ¬(m ≤ a) (so find_max_2 returns 'm')
      constructor
      · simp [IH_mem]
      · intro y hy
        rcases List.mem_cons.mp hy with rfl | hy_tail
        · omega
        · exact IH_max y hy_tail

#check find_max_2
#eval find_max_2 [3, 1, 4, 5, 1] (by decide)

-- =========================================================================
-- 3. DEPENDENT DATA STRUCTURES (Correct-by-Construction / Indexed Families)
-- =========================================================================
--
-- Instead of using a generic `List` and proving it's non-empty or sorted, you
-- define a custom data type that enforces invariants at the structural level.
--
-- Example: Instead of `List ℕ` and a proof `L ≠ []`, you use a `NonEmptyList ℕ`.
-- Or, for something more complex, a `MaxHeap` or `SearchTree` type where the
-- constructor itself forces you to pass a proof that the parent node is larger
-- than the child node. The property isn't bundled dynamically, it's locked into
-- the structure's physical shape.

inductive NonEmptyList (α : Type) where
  | singleton (a : α) : NonEmptyList α
  | cons (head : α) (tail : NonEmptyList α) : NonEmptyList α
  -- `deriving Repr` automatically generates the code needed to print this type in `#eval` and `#print`
  deriving Repr

-- Now `find_max_3` takes a structurally non-empty list.
-- We no longer need to pass a proof `h : L ≠ []` because the structure itself
-- guarantees there is at least one element. The compiler knows this, so
-- pattern matching is exhaustive without an empty case!

def find_max_3 : NonEmptyList ℕ → ℕ
  | NonEmptyList.singleton a => a
  | NonEmptyList.cons head tail =>
      let m := find_max_3 tail
      if m ≤ head then head else m

#check find_max_3
#print find_max_3

-- We can evaluate this directly. Notice we don't need a `(by decide)` proof anymore.
-- However, because `NonEmptyList` is our own custom data type, we do not get the
-- convenient `[3, 1, 4, 5, 1]` bracket syntax that Lean's standard `List` provides.
-- We must construct it manually using our `cons` and `singleton` constructors.
def my_list : NonEmptyList ℕ :=
  NonEmptyList.cons 3 (NonEmptyList.cons 1 (NonEmptyList.cons 4 (NonEmptyList.cons 5 (NonEmptyList.singleton 1))))

#eval find_max_3 my_list

-- To avoid this manual construction, we can write a helper function that securely
-- converts a standard `List` into our `NonEmptyList`, provided we have a proof
-- that the list is not empty!
def toNonEmptyList {α : Type} : (head : α) → (tail : List α) → NonEmptyList α
  | a, [] => NonEmptyList.singleton a
  | a, b :: tail => NonEmptyList.cons a (toNonEmptyList b tail)

def fromList {α : Type} : (L : List α) → L ≠ [] → NonEmptyList α
  | [], h => by contradiction
  | a :: tail, _ => toNonEmptyList a tail

-- Now we can construct a `NonEmptyList` much more ergonomically:
def my_list_2 : NonEmptyList ℕ := fromList [3, 1, 4, 5, 1] (by decide)

#eval find_max_3 my_list_2


-- =========================================================================
-- 4. MONADIC VERIFICATION (Hoare Logic / Imperative Style)
-- =========================================================================
--
-- For imperative, stateful programming (using `do` blocks), you can use verification
-- monads (like `Hoare` monads or frameworks like Iris/Aesop). You attach
-- Pre-conditions and Post-conditions to the state transitions.
--
-- Lean 4 supports imperative programming via `do` notation. For stateful loops,
-- you traditionally verify them using Hoare Triples (Pre/Post-conditions).
-- While standard Lean 4 core doesn't have an automated Hoare framework built-in,
-- here is what the imperative implementation looks like, annotated with invariants.

def find_max_4 (L : List ℕ) (h : L ≠ []) : ℕ := Id.run do
  -- We safely grab the head because of the proof `h`
  let mut current_max := L.head h

  -- {{ PRE-CONDITION: L ≠ [] }}
  -- {{ INVARIANT: current_max ∈ L ∧ ∀ y ∈ (elements processed so far), y ≤ current_max }}
  for x in L.tail do
    if current_max < x then
      current_max := x

  -- {{ POST-CONDITION: current_max ∈ L ∧ ∀ y ∈ L, y ≤ current_max }}
  return current_max

#eval find_max_4 [3, 1, 4, 5, 1] (by decide)