This is the beginning of a fourteen part series:
- Scheme in F#
- Just 'let' Me Be!
- Lambda the Ultimate!
- Rinse and Recurse
- What 'letrec' Can't Do
- What's Lisp Without Lists?!
- No Wait, Macro the Ultimate!
- Oh, The Humanity!
- Language vs. Library
- Turning Your Brain Inside Out With Continuations
- Playing Dice with the Universe
- Functional I/O (or at least "O")
- Functional I/O (including "I" this time)
- Historical Debugging
One of my New Year’s goals (2007) is to re-read Lisp in Small Pieces and implement all 11 interpreters and 2 compilers. As much as I like the "Lisp in Lisp" idea and enjoyed the eureka moment in SICP when Sussman writes the metacircular interpreter on the board to the music from Space Odyssey, I don't want to do Lisp in Lisp itself. Lisp in F# sounds like more fun.
As a warm-up, I’m going through Bill Hails’ absolutely awesome book where he builds “PScheme”; a Scheme interpreter in Perl. Of course, doing it in F#. Here’s v0.0.0. It turned out pretty small (about 100 lines):
I kind of hate these sort of state machine parsers. Later I’ll come back and redo this as parser combinators or lex/yacc, etc. but for now it’s simple enough (just strings, numbers and symbols):
open System
open System.Numerics
type Token =
| Open | Close
| Number of string
| String of string
| Symbol of string
let tokenize source =
let rec string acc = function
| '\\' :: '"' :: t -> string (acc + "\"") t // escaped quote becomes quote
| '"' :: t -> acc, t // closing quote terminates
| c :: t -> string (acc + (c.ToString())) t // otherwise accumulate chars
| _ -> failwith "Malformed string."
let rec token acc = function
| (')' :: _) as t -> acc, t // closing paren terminates
| w :: t when Char.IsWhiteSpace(w) -> acc, t // whitespace terminates
| [] -> acc, [] // end of list terminates
| c :: t -> token (acc + (c.ToString())) t // otherwise accumulate chars
let rec tokenize' acc = function
| w :: t when Char.IsWhiteSpace(w) -> tokenize' acc t // skip whitespace
| '(' :: t -> tokenize' (Open :: acc) t
| ')' :: t -> tokenize' (Close :: acc) t
| '"' :: t -> // start of string
let s, t' = string "" t
tokenize' (Token.String(s) :: acc) t'
| '-' :: d :: t when Char.IsDigit(d) -> // start of negative number
let n, t' = token ("-" + d.ToString()) t
tokenize' (Token.Number(n) :: acc) t'
| '+' :: d :: t | d :: t when Char.IsDigit(d) -> // start of positive number
let n, t' = token (d.ToString()) t
tokenize' (Token.Number(n) :: acc) t'
| s :: t -> // otherwise start of symbol
let s, t' = token (s.ToString()) t
tokenize' (Token.Symbol(s) :: acc) t'
| [] -> List.rev acc // end of list terminates
tokenize' [] source
The near-equivalence between syntax and semantics in Scheme makes the parser trivial. There’s almost a 1:1 correspondence between tokens and expressions. One exception is that the Open
/Close
tokens denote Expression lists:
type Expression =
| Number of BigInteger
| String of string
| Symbol of string
| List of Expression list
| Function of (Expression list -> Expression)
| Special of (Expression list -> Expression)
let parse source =
let map = function
| Token.Number(n) -> Expression.Number(BigInteger.Parse(n))
| Token.String(s) -> Expression.String(s)
| Token.Symbol(s) -> Expression.Symbol(s)
| _ -> failwith "Syntax error."
let rec parse' acc = function
| Open :: t –>
let e, t' = parse' [] t
parse' (List(e) :: acc) t'
| Close :: t -> (List.rev acc), t
| h :: t -> parse' ((map h) :: acc) t
| [] -> (List.rev acc), []
let result, _ = parse' [] (tokenize source)
result
For printing expressions to the console:
let rec print = function
| List(list) -> "(" + String.Join(" ", (List.map print list)) + ")"
| String(s) –>
let escape = String.collect (function '"' -> "\\\"" | c -> c.ToString()) // escape quotes
"\"" + (escape s) + "\""
| Symbol(s) –> s
| Number(n) -> n.ToString()
| Function(_) | Special(_) -> "Function"
We begin with just a few primitives (which are simply Expression –> Expression functions) for doing basic math and conditionals and to seed a global environment with them:
let Multiply args =
let prod a = function Number(b) -> a * b | _ -> failwith "Malformed multiplication argument."
Number(List.fold prod 1I args)
let Subtract = function
| [] -> Number(0I) // (-) == 0
| [Number(n)] -> Number(-n) // (- a) == –a
| Number(n) :: ns -> // (- a b c) == a - b – c
let sub a = function Number(b) -> a - b | _ -> failwith "Malformed subtraction argument."
Number(List.fold sub n ns)
| _ -> failwith "Malformed subtraction."
let rec If = function
| [condition; t; f] –>
match eval condition with
| List([]) | String("") -> eval f // empty list or empty string is false
| Number(n) when n = 0I -> eval f // zero is false
| _ -> eval t // everything else is true
| _ -> failwith "Malformed 'if'."
and environment =
Map.ofList [
"*", Function(Multiply)
"-", Function(Subtract)
"if", Special(If)]
The classic yin/yang of eval/apply are very easy to implement. Literals eval to themselves, symbols are looked up in the environment, and lists are applied. Special forms are, well, special in that they don’t eval their arguments up front; leaving it up to the callee (e.g. If
will eval one of two expressions depending on the conditional). This is important to allow short-circuiting.
Notice that If
and environment
above along with eval and apply below are all mutually recursive:
and eval expression =
match expression with
| Number(_) as lit –> lit
| String(_) as lit –> lit
| Symbol(s) -> environment.[s]
| List([]) –> List([])
| List(h :: t) –>
match eval h with
| Function(f) -> apply f t
| Special(f) -> f t
| _ -> failwith "Malformed expression."
| _ -> failwith "Malformed expression."
and apply fn args = fn (List.map eval args)
The REPL now just reads a line from the console, parses, evals and prints it, then rinses and repeats.
let rep = List.ofSeq >> parse >> List.head >> eval >> print
let rec repl output =
printf "%s\n> " output
try Console.ReadLine() |> rep |> repl
with ex -> repl ex.Message
The whole deal is kicked off with:
repl "Welcome to FScheme"
Gotta have tests:
let test () =
let case source expected =
try
let output = rep source
if output <> expected then
printf "TEST FAILED: %s (Expected: %s, Actual: %s)" source expected output
with _ -> printf "TEST CRASHED: %s" source
case "1" "1" // numbers
case "+1" "1" // explicit positive numbers
case "-1" "-1" // negative numbers
case "\"hello\"" "\"hello\"" // strings
case "(*)" "1" // multiplication
case "(* 2 3)" "6" // multiplication
case "(* 2 3 4)" "24" // multiplication
case "(-)" "0" // strange subtraction case
case "(- 10)" "-10" // negation
case "(- 10 2)" "8" // subtraction
case "(- 10 2 3)" "5" // subtraction
case "(if (* 0 1) 10 20)" "20" // if
case "(if (* 1 1) 10 20)" "10" // if
case "(if (* 1 1) 10 bomb)" "10" // if (special form)
case "(* 1234567890987654321 1234567890987654321)" "1524157877457704723228166437789971041" // bigint math