Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verification #5

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,6 @@ agg-synthesis/utilities.py
agg-synthesis/test-old/*
agg-synthesis/s_sort_2.py
agg-synthesis/missing-agg

# Lean
*.lake
4 changes: 4 additions & 0 deletions lean-4/Aggregators.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This module serves as the root of the `Project` library.
-- Import modules here that should be built as part of the library.
import Aggregators.Basic
import Aggregators.Utils
170 changes: 170 additions & 0 deletions lean-4/Aggregators/Basic.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import Mathlib
import Mathlib.Data.List.Sort
import Lean
open Lean Elab Meta

/-
The wc command counts the number of characters in a file.
The wc_agg function is a simple aggregation function that sums the number of characters in two files.
-/

def wc_agg : Nat → Nat → Nat :=
λ x y ↦ x + y

-- wc xs + wc ys = wc ys + wc xs

theorem wc_correctness
(wc : String → Nat)
(h : ∀ xs ys, wc (xs ++ ys) = wc xs + wc ys) :
wc (xs ++ ys) = wc_agg (wc xs) (wc ys) :=
by
rw [wc_agg]
rw [h]

def pairwise_add : Nat × Nat → Nat × Nat → Nat × Nat
:= λ (x1, y1) (x2, y2) => (x1 + x2, y1 + y2)

def wc_agg' := pairwise_add

theorem wc_correctness' (wc : String → Nat × Nat)
(h : ∀ xs ys, wc (xs ++ ys) = pairwise_add (wc xs) (wc ys)) :
wc (xs ++ ys) = wc_agg' (wc xs) (wc ys) :=
by
rw [wc_agg']
rw [h]

/-
def wc : String → Nat :=

λ file ↦ file.length

def length : (@& String) → Nat
⟨s⟩ => s.length

def append : String → (@& String) → String
| ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩

theorem wc_correctness (xs ys : String) :
wc (xs ++ ys) = wc_agg (wc xs) (wc ys) :=
by
rw [wc_agg]
repeat rw [wc]
-- 1. simp [length]
-- 2. simp [append]
-- 3. simp [List.length_append]
apply String.length_append
-/

/-
The sort command sorts a list of strings.
The sort_agg function is a simple aggregation function that merges the two sorted lists.
-/

-- REFERENCE:
-- https://github.com/leanprover-community/mathlib4/blob/8666bd82efec40b8b3a5abca02dc9b24bbdf2652/Mathlib/Data/List/Sort.lean

def length : List α → Nat := List.length

@[simp]
def split : List α → List α × List α
| [] => ([], [])
| a :: l =>
let (l₁, l₂) := split l
(a :: l₂, l₁)

theorem split_cons_of_eq (a : α) {l l₁ l₂ : List α} (h : split l = (l₁, l₂)) :
split (a :: l) = (a :: l₂, l₁) := by rw [split, h]

theorem length_split_le :
∀ {l l₁ l₂ : List α}, split l = (l₁, l₂) → length l₁ ≤ length l ∧ length l₂ ≤ length l
| [], _, _, rfl => ⟨Nat.le_refl 0, Nat.le_refl 0⟩
| a :: l, l₁', l₂', h => by
cases' e : split l with l₁ l₂
injection (split_cons_of_eq _ e).symm.trans h; substs l₁' l₂'
cases' length_split_le e with h₁ h₂
exact ⟨Nat.succ_le_succ h₂, Nat.le_succ_of_le h₁⟩

theorem length_split_lt {a b} {l l₁ l₂ : List α} (h : split (a :: b :: l) = (l₁, l₂)) :
length l₁ < length (a :: b :: l) ∧ length l₂ < length (a :: b :: l) := by
cases' e : split l with l₁' l₂'
injection (split_cons_of_eq _ (split_cons_of_eq _ e)).symm.trans h; substs l₁ l₂
cases' length_split_le e with h₁ h₂
exact ⟨Nat.succ_le_succ (Nat.succ_le_succ h₁), Nat.succ_le_succ (Nat.succ_le_succ h₂)⟩

def merge {α : Type} (r : α → α → Prop) [DecidableRel r]
: List α → List α → List α
| [], l' => l'
| l, [] => l
| a :: l, b :: l' => if (r a b) then a :: merge r l (b :: l') else b :: merge r (a :: l) l'

def mergeSort (r : α → α → Prop) [DecidableRel r] : List α → List α
| [] => []
| [a] => [a]
| a :: b :: l => by
-- Porting note: rewrote to make `mergeSort_cons_cons` proof easier
let ls := (split (a :: b :: l))
have e : split (a :: b :: l) = ⟨ls.1, ls.2⟩ := rfl
have h := length_split_lt e
have := h.1
have := h.2
exact merge r (mergeSort r ls.1) (mergeSort r ls.2)
termination_by l => length l

-- This is mergeSort_cons_cons from the mathlib library

alias sort := mergeSort
alias sort_agg := merge

theorem sort_correctness {a b} {c xs ys : List α}
(r : α → α → Prop) [DecidableRel r] (h : split (a :: b :: c) = (xs, ys)) :
sort r (a :: b :: c) = sort_agg r (sort r xs) (sort r ys) := by

rw [sort, sort_agg]
simp only [mergeSort, h]

/-
The grep command searches for a pattern in a file.
The grep_agg function is a simple aggregation function that concatenates the results of two greps.
-/

def grep_agg : String → String → String :=
λ x y ↦ x ++ y

theorem grep_correctness (grep : String → Strinig → String)
(h : ∀ xs ys pattern, grep (xs ++ ys) pattern = grep xs pattern ++ grep ys pattern)
(xs ys : String) :
grep (xs ++ ys) pattern = grep_agg (grep xs pattern) (grep ys pattern) :=
by
rw [grep_agg]
rw [h]

/-
The uniq command removes duplicates from a list of strings.
The unique_agg function is a simple aggregation function that merges the two unique lists.
-/

def uniq {α : Type} [DecidableEq α] : List α → List α :=
λ xs ↦ xs.eraseDup

-- Run uniq again to remove duplicates from the concatenated list
def uniq_uniq {α : Type} [DecidableEq α] (xs ys: List α) :
uniq (xs ++ ys) = uniq (uniq xs ++ uniq ys) :=
by
repeat rw [uniq]
induction xs with
| nil =>
repeat rw [List.eraseDup]
simp
| cons x xs ih =>
-- simp [List.eraseDup]
-- rw [ih]
sorry

-- rw [h]
-- simp [List.eraseDup]
-- rw [ih]
-- simp

-- TODO: Specify the optimized uniq_agg function
-- def uniq_agg {α : Type} [DecidableEq α] : List α → List α → List α :=
-- sorry
76 changes: 76 additions & 0 deletions lean-4/Aggregators/Utils.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
-- TODO: Handle errors

/- IO.FS.Stream represents a POSIX stream
structure Stream where
flush : IO Unit
read : USize → IO ByteArray
write : ByteArray → IO Unit
getLine : IO String
putStr : String → IO Unit
-/

def bufsize : USize := 20 * 1024

def exitWithError {α : Type} (errMsg : String) : IO α := do
throw <| IO.userError errMsg

/-- Returns a stream for the given file, or none if the file does not exist.
The file handle tracks the underlying file descriptor.
When there are no references to its value, a finalizer closes the file descriptor.
It is given the same interface as a POSIX stream using IO.FS.Stream.ofHandle. -/
def getFileStream (filename : System.FilePath) : IO IO.FS.Stream := do
let fileExists ← filename.pathExists
if not fileExists then
exitWithError s!"File not found: {filename}"
else
let handle ← IO.FS.Handle.mk filename IO.FS.Mode.read
pure (IO.FS.Stream.ofHandle handle)

def getAllStreams (filenames : List System.FilePath) : IO (List IO.FS.Stream) := do
filenames.mapM getFileStream

/-- Reads from the stream and writes to stdout until the stream is empty. -/
partial def dump (stream : IO.FS.Stream) : IO Unit := do
let buf ← stream.read bufsize
if buf.isEmpty then
pure ()
else
let stdout ← IO.getStdout
stdout.write buf
dump stream

/-- Reads from the stream and returns a list of lines. -/
partial def readFile (stream : IO.FS.Stream) (buf : List String) : IO (List String) := do
let line ← stream.getLine
if line.isEmpty then
pure buf
else
let mut buf := buf.append [line]
readFile stream buf

/-- Reads from the list of streams and returns a list of all lines. -/
partial def readAll (streams : List IO.FS.Stream) (buf : List String) : IO (List String) := do
match streams with
| [] => pure buf
| stream :: rest =>
let buf ← readFile stream buf
readAll rest buf

structure Input where
key : Nat
input : String
deriving Repr

instance : ToString Input where
toString : Input → String
| ⟨_, input⟩ => input

def parseInput (lines : List String) : List Input :=
lines.map (fun line =>
let parts := line.splitOn " "
let key := parts.get! 0 |>.toNat?
match key with
| none => ⟨0, line⟩
| some n => ⟨n, line⟩
)

21 changes: 21 additions & 0 deletions lean-4/Main.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import Aggregators

/-- Aggregator for sort -n -/
def main (args : List String) : IO UInt32 := do
let args : List System.FilePath := List.map (fun arg ↦ ⟨arg⟩) args
let streams ← getAllStreams args

let output ← List.foldlM (fun acc stream => do
let lines ← readFile stream []
let inputs := parseInput lines
let acc := merge (fun a b => a.key <= b.key) acc inputs
pure acc)
[] streams

output.forM (fun output => IO.print output)
return 0

/- Tests
#eval main ["test.txt", "test2.txt"]
sort -n test.txt test2.txt > theirs.txt && .lake/build/bin/aggregatorstest.txt test2.txt > mine.txt && diff mine.txt theirs.txt
#eval merge (fun a b => a <= b) [1, 3, 5] [2, 4, 6] -/
5 changes: 5 additions & 0 deletions lean-4/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# lean-4

The `Aggregators` folder contains basic functions, their proofs, and utility functions.
The `Main.lean` file contains the `sort -n` aggregator.
Build the project with `lake build` and run the binary with `.lake/build/bin/Aggregators`.
85 changes: 85 additions & 0 deletions lean-4/lake-manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{"version": "1.1.0",
"packagesDir": ".lake/packages",
"packages":
[{"url": "https://github.com/leanprover-community/batteries",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "9efd9c267ad7a71c5e3a83e8fbbd446fe61ef119",
"name": "batteries",
"manifestFile": "lake-manifest.json",
"inputRev": "main",
"inherited": true,
"configFile": "lakefile.lean"},
{"url": "https://github.com/leanprover-community/quote4",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "fa3d73a2cf077f4b14c7840352ac7b08aeb6eb41",
"name": "Qq",
"manifestFile": "lake-manifest.json",
"inputRev": "master",
"inherited": true,
"configFile": "lakefile.lean"},
{"url": "https://github.com/leanprover-community/aesop",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "b20a88676fd00affb90cbc9f1ff004ae588103b3",
"name": "aesop",
"manifestFile": "lake-manifest.json",
"inputRev": "master",
"inherited": true,
"configFile": "lakefile.toml"},
{"url": "https://github.com/leanprover-community/ProofWidgets4",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "cd20dae87c48495f0220663014dff11671597fcf",
"name": "proofwidgets",
"manifestFile": "lake-manifest.json",
"inputRev": "v0.0.43-pre",
"inherited": true,
"configFile": "lakefile.lean"},
{"url": "https://github.com/leanprover/lean4-cli",
"type": "git",
"subDir": null,
"scope": "leanprover",
"rev": "2cf1030dc2ae6b3632c84a09350b675ef3e347d0",
"name": "Cli",
"manifestFile": "lake-manifest.json",
"inputRev": "main",
"inherited": true,
"configFile": "lakefile.toml"},
{"url": "https://github.com/leanprover-community/import-graph",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "9b4088ccf0f44ddd7b1132bb1348aef8cf481e12",
"name": "importGraph",
"manifestFile": "lake-manifest.json",
"inputRev": "main",
"inherited": true,
"configFile": "lakefile.toml"},
{"url": "https://github.com/leanprover-community/LeanSearchClient",
"type": "git",
"subDir": null,
"scope": "leanprover-community",
"rev": "4b61d4abc1659f15ffda5ec24fdebc229d51d066",
"name": "LeanSearchClient",
"manifestFile": "lake-manifest.json",
"inputRev": "main",
"inherited": true,
"configFile": "lakefile.toml"},
{"url": "https://github.com/leanprover-community/mathlib4",
"type": "git",
"subDir": null,
"scope": "",
"rev": "27ca8188dc20fb61ac2f2878268ce5748a324a6c",
"name": "mathlib",
"manifestFile": "lake-manifest.json",
"inputRev": null,
"inherited": false,
"configFile": "lakefile.lean"}],
"name": "aggregators",
"lakeDir": ".lake"}
14 changes: 14 additions & 0 deletions lean-4/lakefile.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import Lake
open Lake DSL

package "aggregators" where
-- add package configuration options here

lean_lib «Aggregators» where
-- add library configuration options here

@[default_target]
lean_exe "aggregators" where
root := `Main

require mathlib from git "https://github.com/leanprover-community/mathlib4"
1 change: 1 addition & 0 deletions lean-4/lean-toolchain
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
leanprover/lean4:v4.13.0-rc3