binpash · RobertoGonzalesMatos · Oct 3, 2024 · Oct 4, 2024 · Oct 16, 2024
diff --git a/.gitignore b/.gitignore
@@ -66,3 +66,6 @@ agg-synthesis/utilities.py
 agg-synthesis/test-old/*
 agg-synthesis/s_sort_2.py
 agg-synthesis/missing-agg
+
+# Lean
+*.lake
diff --git a/lean-4/Aggregators.lean b/lean-4/Aggregators.lean
@@ -0,0 +1,4 @@
+-- This module serves as the root of the `Project` library.
+-- Import modules here that should be built as part of the library.
+import Aggregators.Basic
+import Aggregators.Utils
diff --git a/lean-4/Aggregators/Basic.lean b/lean-4/Aggregators/Basic.lean
@@ -0,0 +1,170 @@
+import Mathlib
+import Mathlib.Data.List.Sort 
+import Lean
+open Lean Elab Meta
+
+/- 
+  The wc command counts the number of characters in a file.
+  The wc_agg function is a simple aggregation function that sums the number of characters in two files.
+-/
+
+def wc_agg : Nat → Nat → Nat :=
+  λ x y ↦ x + y
+
+-- wc xs + wc ys = wc ys + wc xs
+
+theorem wc_correctness 
+  (wc : String → Nat)
+  (h : ∀ xs ys, wc (xs ++ ys) = wc xs + wc ys) : 
+  wc (xs ++ ys) = wc_agg (wc xs) (wc ys) :=
+  by 
+    rw [wc_agg]
+    rw [h]
+
+def pairwise_add : Nat × Nat → Nat × Nat → Nat × Nat
+  := λ (x1, y1) (x2, y2) => (x1 + x2, y1 + y2)
+
+def wc_agg' := pairwise_add
+
+theorem wc_correctness' (wc : String → Nat × Nat)
+  (h : ∀ xs ys, wc (xs ++ ys) = pairwise_add (wc xs) (wc ys)) : 
+  wc (xs ++ ys) = wc_agg' (wc xs) (wc ys) :=
+  by
+    rw [wc_agg']
+    rw [h]
+
+/-
+def wc : String → Nat :=
+
+  λ file ↦ file.length
+
+def length : (@& String) → Nat
+  ⟨s⟩ => s.length
+
+def append : String → (@& String) → String
+  | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩
+
+theorem wc_correctness (xs ys : String) : 
+wc (xs ++ ys) = wc_agg (wc xs) (wc ys) := 
+  by 
+    rw [wc_agg]
+    repeat rw [wc]
+    -- 1. simp [length]
+    -- 2. simp [append]
+    -- 3. simp [List.length_append]
+    apply String.length_append
+-/
+
+/-
+  The sort command sorts a list of strings.
+  The sort_agg function is a simple aggregation function that merges the two sorted lists.
+-/
+
+-- REFERENCE: 
+-- https://github.com/leanprover-community/mathlib4/blob/8666bd82efec40b8b3a5abca02dc9b24bbdf2652/Mathlib/Data/List/Sort.lean
+
+def length : List α → Nat := List.length
+
+@[simp]
+def split : List α → List α × List α
+  | [] => ([], [])
+  | a :: l =>
+    let (l₁, l₂) := split l
+    (a :: l₂, l₁)
+
+theorem split_cons_of_eq (a : α) {l l₁ l₂ : List α} (h : split l = (l₁, l₂)) :
+    split (a :: l) = (a :: l₂, l₁) := by rw [split, h]
+
+theorem length_split_le :
+    ∀ {l l₁ l₂ : List α}, split l = (l₁, l₂) → length l₁ ≤ length l ∧ length l₂ ≤ length l
+  | [], _, _, rfl => ⟨Nat.le_refl 0, Nat.le_refl 0⟩
+  | a :: l, l₁', l₂', h => by
+    cases' e : split l with l₁ l₂
+    injection (split_cons_of_eq _ e).symm.trans h; substs l₁' l₂'
+    cases' length_split_le e with h₁ h₂
+    exact ⟨Nat.succ_le_succ h₂, Nat.le_succ_of_le h₁⟩
+
+theorem length_split_lt {a b} {l l₁ l₂ : List α} (h : split (a :: b :: l) = (l₁, l₂)) :
+    length l₁ < length (a :: b :: l) ∧ length l₂ < length (a :: b :: l) := by
+    cases' e : split l with l₁' l₂'
+    injection (split_cons_of_eq _ (split_cons_of_eq _ e)).symm.trans h; substs l₁ l₂
+    cases' length_split_le e with h₁ h₂
+    exact ⟨Nat.succ_le_succ (Nat.succ_le_succ h₁), Nat.succ_le_succ (Nat.succ_le_succ h₂)⟩
+
+def merge {α : Type} (r : α → α → Prop) [DecidableRel r] 
+  : List α → List α → List α
+  | [], l' => l'
+  | l, [] => l
+  | a :: l, b :: l' => if (r a b) then a :: merge r l (b :: l') else b :: merge r (a :: l) l'
+
+def mergeSort (r : α → α → Prop) [DecidableRel r] : List α → List α
+  | [] => []
+  | [a] => [a]
+  | a :: b :: l => by
+    -- Porting note: rewrote to make `mergeSort_cons_cons` proof easier
+    let ls := (split (a :: b :: l))
+    have e : split (a :: b :: l) = ⟨ls.1, ls.2⟩ := rfl
+    have h := length_split_lt e
+    have := h.1
+    have := h.2
+    exact merge r (mergeSort r ls.1) (mergeSort r ls.2)
+  termination_by l => length l
+
+-- This is mergeSort_cons_cons from the mathlib library
+
+alias sort := mergeSort 
+alias sort_agg := merge
+
+theorem sort_correctness {a b} {c xs ys : List α} 
+    (r : α → α → Prop) [DecidableRel r] (h : split (a :: b :: c) = (xs, ys)) :
+    sort r (a :: b :: c) = sort_agg r (sort r xs) (sort r ys) := by
+
+  rw [sort, sort_agg]
+  simp only [mergeSort, h]
+
+/-
+  The grep command searches for a pattern in a file.
+  The grep_agg function is a simple aggregation function that concatenates the results of two greps.
+-/
+
+def grep_agg : String → String → String :=
+  λ x y ↦ x ++ y
+
+theorem grep_correctness (grep : String → Strinig → String) 
+  (h : ∀ xs ys pattern, grep (xs ++ ys) pattern = grep xs pattern ++ grep ys pattern)
+  (xs ys : String) : 
+  grep (xs ++ ys) pattern = grep_agg (grep xs pattern) (grep ys pattern) :=
+  by
+    rw [grep_agg]
+    rw [h]
+
+/-
+  The uniq command removes duplicates from a list of strings.
+  The unique_agg function is a simple aggregation function that merges the two unique lists.
+-/
+
+def uniq {α : Type} [DecidableEq α] : List α → List α :=
+  λ xs ↦ xs.eraseDup
+
+-- Run uniq again to remove duplicates from the concatenated list
+def uniq_uniq {α : Type} [DecidableEq α] (xs ys: List α) : 
+  uniq (xs ++ ys) = uniq (uniq xs ++ uniq ys) :=
+  by
+    repeat rw [uniq]
+    induction xs with
+      | nil => 
+        repeat rw [List.eraseDup]
+        simp
+      | cons x xs ih =>
+        -- simp [List.eraseDup]
+        -- rw [ih]
+        sorry
+
+        -- rw [h]
+        -- simp [List.eraseDup]
+        -- rw [ih]
+        -- simp
+
+-- TODO: Specify the optimized uniq_agg function 
+-- def uniq_agg {α : Type} [DecidableEq α] : List α → List α → List α :=
+--   sorry
diff --git a/lean-4/Aggregators/Utils.lean b/lean-4/Aggregators/Utils.lean
@@ -0,0 +1,76 @@
+-- TODO: Handle errors
+
+/- IO.FS.Stream represents a POSIX stream
+  structure Stream where
+    flush   : IO Unit
+    read    : USize → IO ByteArray
+    write   : ByteArray → IO Unit
+    getLine : IO String
+    putStr  : String → IO Unit
+-/
+
+def bufsize : USize := 20 * 1024
+
+def exitWithError {α : Type} (errMsg : String) : IO α := do
+  throw <| IO.userError errMsg
+
+/-- Returns a stream for the given file, or none if the file does not exist.
+    The file handle tracks the underlying file descriptor. 
+    When there are no references to its value, a finalizer closes the file descriptor.
+    It is given the same interface as a POSIX stream using IO.FS.Stream.ofHandle. -/
+def getFileStream (filename : System.FilePath) : IO IO.FS.Stream := do
+  let fileExists ← filename.pathExists
+  if not fileExists then 
+    exitWithError s!"File not found: {filename}"
+  else 
+    let handle ← IO.FS.Handle.mk filename IO.FS.Mode.read
+    pure (IO.FS.Stream.ofHandle handle)
+
+def getAllStreams (filenames : List System.FilePath) : IO (List IO.FS.Stream) := do
+  filenames.mapM getFileStream
+
+/-- Reads from the stream and writes to stdout until the stream is empty. -/
+partial def dump (stream : IO.FS.Stream) : IO Unit := do
+  let buf ← stream.read bufsize
+  if buf.isEmpty then
+    pure ()
+  else
+    let stdout ← IO.getStdout
+    stdout.write buf
+    dump stream
+
+/-- Reads from the stream and returns a list of lines. -/
+partial def readFile (stream : IO.FS.Stream) (buf : List String) : IO (List String) := do
+  let line ← stream.getLine
+  if line.isEmpty then
+    pure buf
+  else
+    let mut buf := buf.append [line]
+    readFile stream buf
+
+/-- Reads from the list of streams and returns a list of all lines. -/
+partial def readAll (streams : List IO.FS.Stream) (buf : List String) : IO (List String) := do
+  match streams with
+  | [] => pure buf
+  | stream :: rest => 
+    let buf ← readFile stream buf
+    readAll rest buf
+
+structure Input where
+  key   : Nat
+  input : String
+  deriving Repr 
+
+instance : ToString Input where 
+  toString : Input → String
+  | ⟨_, input⟩ => input 
+
+def parseInput (lines : List String) : List Input :=
+  lines.map (fun line => 
+    let parts := line.splitOn " "
+    let key := parts.get! 0 |>.toNat?
+    match key with 
+    | none => ⟨0, line⟩
+    | some n => ⟨n, line⟩
+  )
+
diff --git a/lean-4/Main.lean b/lean-4/Main.lean
@@ -0,0 +1,21 @@
+import Aggregators
+
+/-- Aggregator for sort -n -/
+def main (args : List String) : IO UInt32 := do
+  let args : List System.FilePath := List.map (fun arg ↦ ⟨arg⟩) args
+  let streams ← getAllStreams args
+
+  let output ← List.foldlM (fun acc stream => do
+    let lines ← readFile stream []
+    let inputs := parseInput lines
+    let acc := merge (fun a b => a.key <= b.key) acc inputs
+    pure acc) 
+    [] streams
+
+  output.forM (fun output => IO.print output)
+  return 0
+
+/- Tests
+#eval main ["test.txt", "test2.txt"]
+sort -n test.txt test2.txt > theirs.txt && .lake/build/bin/aggregatorstest.txt test2.txt > mine.txt && diff mine.txt theirs.txt
+#eval merge (fun a b => a <= b) [1, 3, 5] [2, 4, 6] -/
diff --git a/lean-4/README.md b/lean-4/README.md
@@ -0,0 +1,5 @@
+# lean-4
+
+The `Aggregators` folder contains basic functions, their proofs, and utility functions.
+The `Main.lean` file contains the `sort -n` aggregator. 
+Build the project with `lake build` and run the binary with `.lake/build/bin/Aggregators`. 
diff --git a/lean-4/lake-manifest.json b/lean-4/lake-manifest.json
@@ -0,0 +1,85 @@
+{"version": "1.1.0",
+ "packagesDir": ".lake/packages",
+ "packages":
+ [{"url": "https://github.com/leanprover-community/batteries",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "9efd9c267ad7a71c5e3a83e8fbbd446fe61ef119",
+   "name": "batteries",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/quote4",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "fa3d73a2cf077f4b14c7840352ac7b08aeb6eb41",
+   "name": "Qq",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "master",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/aesop",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "b20a88676fd00affb90cbc9f1ff004ae588103b3",
+   "name": "aesop",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "master",
+   "inherited": true,
+   "configFile": "lakefile.toml"},
+  {"url": "https://github.com/leanprover-community/ProofWidgets4",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "cd20dae87c48495f0220663014dff11671597fcf",
+   "name": "proofwidgets",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "v0.0.43-pre",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover/lean4-cli",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover",
+   "rev": "2cf1030dc2ae6b3632c84a09350b675ef3e347d0",
+   "name": "Cli",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.toml"},
+  {"url": "https://github.com/leanprover-community/import-graph",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "9b4088ccf0f44ddd7b1132bb1348aef8cf481e12",
+   "name": "importGraph",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.toml"},
+  {"url": "https://github.com/leanprover-community/LeanSearchClient",
+   "type": "git",
+   "subDir": null,
+   "scope": "leanprover-community",
+   "rev": "4b61d4abc1659f15ffda5ec24fdebc229d51d066",
+   "name": "LeanSearchClient",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.toml"},
+  {"url": "https://github.com/leanprover-community/mathlib4",
+   "type": "git",
+   "subDir": null,
+   "scope": "",
+   "rev": "27ca8188dc20fb61ac2f2878268ce5748a324a6c",
+   "name": "mathlib",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": null,
+   "inherited": false,
+   "configFile": "lakefile.lean"}],
+ "name": "aggregators",
+ "lakeDir": ".lake"}
diff --git a/lean-4/lakefile.lean b/lean-4/lakefile.lean
@@ -0,0 +1,14 @@
+import Lake
+open Lake DSL
+
+package "aggregators" where
+  -- add package configuration options here
+
+lean_lib «Aggregators» where
+  -- add library configuration options here
+
+@[default_target]
+lean_exe "aggregators" where
+  root := `Main
+
+require mathlib from git "https://github.com/leanprover-community/mathlib4"
diff --git a/lean-4/lean-toolchain b/lean-4/lean-toolchain
@@ -0,0 +1 @@
+leanprover/lean4:v4.13.0-rc3