diff --git a/CHANGELOG.md b/CHANGELOG.md index f4dbfc433..c865d7abc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project does not currently adhere to a particular versioning scheme. ### Added +- Add type system for Bend. ([#615][gh-615], [#679][gh-679], see [Type Checking](docs/type-checking.md)) - Add import system. ([#544][gh-544]) - Add multi line comment `#{ ... #}` syntax. ([#595][gh-595]) - Add error message when input file is not found. ([#513][gh-513]) diff --git a/cspell.json b/cspell.json index 4aa80d973..0ade9d2e0 100644 --- a/cspell.json +++ b/cspell.json @@ -34,6 +34,7 @@ "foldl", "hasher", "hexdigit", + "Hindley", "hvm's", "indexmap", "inet", @@ -57,6 +58,7 @@ "lnil", "lpthread", "mant", + "Milner", "miscompilation", "mult", "namegen", @@ -109,6 +111,7 @@ "succ", "supercombinator", "supercombinators", + "Tarjan's", "tlsv", "TSPL", "tunr", diff --git a/docs/syntax.md b/docs/syntax.md index 389a422c0..25d5d03c8 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -10,6 +10,10 @@ Click [here](#import-syntax) to see the import syntax. Click [here](#comments) to see the syntax for commenting code. +Click [here](#imp-type-syntax) to see the imperative type syntax. + +Click [here](#fun-type-syntax) to see the functional type syntax. + Both syntaxes can be mixed in the same file like the example below: ```python @@ -44,12 +48,15 @@ main = Defines a top level function. ```python -def add(x, y): +def add(x: u24, y: u24) -> u24: result = x + y return result +def unchecked two() -> u24: + return 2 + def main: - return add(40, 2) + return add(40, two) ``` A function definition is composed by a name, a sequence of parameters and a body. @@ -59,6 +66,10 @@ A top-level name can be anything matching the regex `[A-Za-z0-9_.-/]+`, except i The last statement of each function must either be a `return` or a selection statement (`if`, `switch`, `match`, `fold`) where all branches `return`. +Each parameter of the function can receive a type annotation with `param_name: type` and the return value of the function can also be annotated with `def fn_name(args) -> return_type:`. + +We can force the type-checker to run or not on a specific function by adding `checked` or `unchecked` between `def` and the function name. + ### Type Defines an algebraic data type. @@ -68,14 +79,16 @@ type Option: Some { value } None -type Tree: - Node { value, ~left, ~right } +type Tree(T): + Node { value: T, ~left: Tree(T), ~right: Tree(T) } Leaf ``` Type names must be unique, and should have at least one constructor. -Each constructor is defined by a name followed by its fields. +For a generic or polymorphic type, all type variables used in the constructors must be declared first in the type definition with `type Name(type_var1, ...):` + +Each constructor is defined by a name followed by its fields. The fields can be annotated with types that will be checked when creating values of that type. The `~` notation indicates a recursive field. To use `fold` statements with a type its recursive fields must be correctly marked with `~`. @@ -89,9 +102,9 @@ Read [defining data types](./defining-data-types.md) to know more. Defines a type with a single constructor (like a struct, a record or a class). ```python -object Pair { fst, snd } +object Pair(A, B) { fst: A, snd: B } -object Function { name, args, body } +object Function(T) { name: String, args, body: T } object Vec { len, data } ``` @@ -1256,6 +1269,11 @@ hvm link_ports: (a (b *)) & (c a) ~ (d e) & (e b) ~ (d c) + +# Casts a `u24` to itself. +# We can give type annotations to HVM definitions. +hvm u24_to_u24 -> (u24 -> u24): + ($([u24] ret) ret) ``` It's also possible to define functions using HVM syntax. This can be @@ -1338,3 +1356,248 @@ Multi-line commenting should also be used to document code. def second(x, y): return y ``` + +
+ +# Imp Type Syntax + +## Variable + +Any name represents a type variable. + +Used in generic or polymorphic type definitions. + +```python +# T is a type variable +type Option(T): + Some { value: T } + None + +# A is a type variable +def id(x: A) -> A: + return x +``` + +## Constructor + +`Ctr(...)` represents a constructor type. + +Used for defining custom data types or algebraic data types. +Can contain other types as parameters. + +```python +def head(list: List(T)) -> Option(T) + match list: + case List/Nil: + return Option/None + case List/Cons: + return Option/Some(list.head) +``` + +## Any + +`Any` represents the untyped type. + +It accepts values of alls type and will forcefully cast any type to `Any`. + +Can be used for values that can't be statically typed, either because +they are unknown (like in raw IO calls), because they contain untypable +expressions (like unscoped variables), or because the expression cannot +be typed with the current type system (like the self application `lambda x: x(x)`). + +```python +def main -> Any: + return 24 +``` + +## None + +`None` represents the eraser `*` or absence of a value. + +Often used to indicate that a function doesn't return anything. + +```python +def none -> None: + return * +``` + +## Hole + +`_` represents a hole type. + +This will let the type checker infer the most general type for an argument or return value. + +```python +def increment(x: _) -> _: + return x + 1 +``` + +## u24 + +`u24` represents an unsigned 24-bit integer. + +```python +def zero -> u24: + return 0 +``` + +## i24 + +`i24` represents a signed 24-bit integer. + +```python +def random_integer -> i24: + return -42 +``` + +## f24 + +`f24` represents a 24-bit floating-point number. + +```python +def PI -> f24: + return 3.14 +``` + +## Tuple + +`(_, _, ...)` represents a tuple type. + +Can contain two or more types separated by commas. + +```python +def make_tuple(fst: A, snd: B) -> (A, B): + return (fst, snd) +``` + +## Function + +`a -> b` represents a function type. + +`a` is the input type, and `b` is the output type. + +```python +def apply(f: A -> B, arg: A) -> B: + return f(arg) +``` + + + +# Fun Type Syntax + +## Variable + +Any name represents a type variable. + +Used in generic or polymorphic type definitions. + +```python +# T is a type variable +type (Option T) + = (Some T) + | None + +# A is a type variable +id : A -> A +id x = x +``` + +## Constructor + +`(Ctr ...)` represents a constructor type. + +Used for defining custom data types or algebraic data types. +Can contain other types as parameters. + +```python +head : (List T) -> (Option T) +head [] = Option/None +head (List/Cons head _) = (Option/Some head) +``` + +## Any + +`Any` represents the untyped type. + +It accepts values of alls type and will forcefully cast any type to `Any`. + +Can be used for values that can't be statically typed, either because +they are unknown (like in raw IO calls), because they contain untypable +expressions (like unscoped variables), or because the expression cannot +be typed with the current type system (like the self application `λx (x x)`). + +```python +main : Any +main = @x x +``` + +## None + +`None` represents the eraser `*` or absence of a value. + +Often used to indicate that a function doesn't return anything. + +```python +none : None +none = * +``` + +## Hole + +`_` represents a hole type. + +This will let the type checker infer the most general type for an argument or return value. + +```python +increment : _ -> _ +increment x = (+ x 1) +``` + +## u24 + +`u24` represents an unsigned 24-bit integer. + +```python +zero : u24 +zero = 0 +``` + +## i24 + +`i24` represents a signed 24-bit integer. + +```python +random_integer : i24 +random_integer = -24 +``` + +## f24 + +`f24` represents a 24-bit floating-point number. + +```python +PI : f24 +PI = 3.14 +``` + +## Tuple + +`(_, _, ...)` represents a tuple type. + +Can contain two or more types separated by commas. + +```python +make_tuple : A -> B -> (A, B) +make_tuple fst snd = (fst, snd) +``` + +## Function + +`a -> b` represents a function type. + +`a` is the input type, and `b` is the output type. + +```python +apply : (A -> B) -> A -> B +apply f arg = (f arg) +``` diff --git a/docs/type-checking.md b/docs/type-checking.md new file mode 100644 index 000000000..37ed41e3a --- /dev/null +++ b/docs/type-checking.md @@ -0,0 +1,138 @@ +# Type Checking + +Bend has a type checker with optional typing support based on a Hindley Milner type system. + +Programs can be optionally typed using the respective imp or fun type syntax. Type checking is +enabled by default, but can be toggled with the `-Otype-check` and `-Ono-type-check` options. + +Every function can be annotated with a type for its arguments and return value. +The type checker will infer the type of the function and then compare if it's compatible with the annotated type. + +```python +def add(x: u24, y: u24) -> u24: + return x + y + +# Arguments or return value without annotation are considered `Any`. +# They will be accepted by any function, regardless of being correct or not. +def push(list: List(T), value) -> List(T): + match list: + case List/Nil: + return List/Cons(value, List/Nil) + case List/Cons: + return List/Cons(list.head, push(list.tail, value)) + +# Error, List(T) must only receive values of type `T`. +def append_num(list: List(T), num: u24) -> List(T): + return List/Cons(num, list) + +# Error, Tree(T) can only store one type of value. +def my_tree() -> _: + return ![!1, !"a"] + +# Error, can't add a `u24` and a `f24`. +# Bend doesn't have implicit type conversions. +def add_float(x: u24, y: f24) -> f24: + return x + y +``` + +Bend comes with the following builtin types: + +* `u24`: Unsigned 24-bit integer. +* `i24`: Signed 24-bit integer. +* `f24`: Floating point number. +* `(T1, ..., Tn)`: Tuple with `n` elements of types `T1` to `Tn`. +* `Any`: Untyped value. +* `None`: Eraser `*`. +* `_`: A type that will be inferred by the type checker. + +The prelude library also defines some basic types that are used in Bend programs: + +* `String`: Text represented as a sequence of Unicode characters. +* `List(T)`: A list of values of type `T`. +* `Tree(T)`: A binary tree with values of type `T` at the leaves. +* `Map(T)`: A map from keys of type `u24` to values of type `T`. +* `IO(T)`: A monadic IO type that can be used to perform IO operations. +* `Result(O, E)`: Represents the result of an operation that can either succeed with an `O` or fail with an `E`. + + +Additionally, you can define your own algebraic data types. +In this case, all the type variables that occur in the constructors must be previously defined. + +```python +type Option(T): + Some { value: T } + None +``` + +All the constructors will be declared with the same type `TypeName(var2, var2, ...)`. + +### Enabling and disabling type checking + +In some cases we know that dynamically our program will not do something wrong despite not being able to give it the proper type. + +We can disable type checking for a specific function by either removing the type annotations or by giving it the `unchecked` keyword: + +```python +# Error, type-checked functions can't contain an unscoped variable. +def channel(x: u24) -> (u24 -> u24, u24): + return (lambda $a: x, $a) + +# We can remove the annotations. It won't be type-checked, +# but its type will be `Any -> Any`. +def channel(x): + return (lambda $a: x, $a) + +# Instead, we can use the `unchecked` keyword. +# The annotated type will be considered the truth, regardless of being correct or not. +def unchecked channel(x: u24) -> (u24 -> u24, u24): + return (lambda $a: x, $a) +``` + +The opposite is also possible, we can enable type checking for an unannotated function by using the `checked` keyword before the name of the function in its declaration: + +```python +# Despite the inferred type being `List(T) -> List(T)`, the type checker will consider it as `Any -> Any` because it's not annotated. +def checked tail(list): + match list: + case List/Nil: + return List/Nil + case List/Cons: + return list.tail + +# Error, can't infer the type of this function, despite having type `Any`. +# Not typeable by a Hindley-Milner type system. +checked (scott_concat a b) = (a + λh λt λcons λnil (cons h (scott_concat t b)) + b +) +``` + +We can also disable type checking for the entire program by using the `-Ono-type-check` option. + +Native HVM definitions are always unchecked. + +```python +# This function will be given the type `a -> a`. +hvm native_id -> (a -> a): + (x x) +``` + +### Limitations + +Currently, the following are not supported by the type checker: + +- Superpositions (`{a, b}`, the tuple type with duplication semantics, see [Dups and sups](https://github.com/HigherOrderCO/Bend/blob/main/docs/dups-and-sups.md)). +- Unscoped variables and variable binds (`$a`, `let $a = ...`, see [Scopeless lambdas](https://github.com/HigherOrderCO/Bend/blob/main/docs/using-scopeless-lambdas.md)). +- Expressions not typeable by a Hindley-Milner type system (e.g. self application `λx: x(x)`). + +Additionally, the builtin types `Number` and `Integer` can't be used directly in type annotations. They are used internally by the type checker to handle numeric expressions. + +```python +# The inferred type will be `Number(a) -> Number(a) -> Number(a)`. +def add(x: _, y: _) -> _: + return x + y + +# The inferred type will be `Integer(a) -> Integer(a) -> Integer(a)`. +def shift(x: _, n: _) -> _: + return x << n +``` diff --git a/examples/bubble_sort.bend b/examples/bubble_sort.bend index 4a0cf6f36..d3c05ade2 100644 --- a/examples/bubble_sort.bend +++ b/examples/bubble_sort.bend @@ -1,28 +1,26 @@ # Sorts a list -def sort(xs): +def sort(xs: List(u24)) -> List(u24): match xs: case List/Nil: return List/Nil case List/Cons: return insert(xs.head, sort(xs.tail)) -# Insert : U60 -> List -> List -def insert(v, xs): +def insert(v: u24, xs: List(u24)) -> List(u24): match xs: case List/Nil: return List/Cons(v, List/Nil) case List/Cons: return swap_gt(v, xs.head, xs.tail) -# SwapGT : U60 -> U60 -> U60 -> List -> List -def swap_gt(v, x, xs): +def swap_gt(v: u24, x: u24, xs: List(u24)) -> List(u24): if x > v: return List/Cons(v, List/Cons(x, xs)) else: return List/Cons(x, insert(v, xs)) # Generates a list of 'n' random u24 numbers using xorshift -def rnd(n): +def rnd(n: u24) -> List(u24): bend n, state=1: when n != 0: state = state ^ (state << 13) @@ -33,13 +31,13 @@ def rnd(n): return List/Nil # Sums a list of u24 numbers -def sum(xs): +def sum(xs: List(u24)) -> u24: fold xs: case List/Nil: return 0 case List/Cons: return xs.head + xs.tail -def main: +def main() -> u24: n = 100 return sum(sort(rnd(n))) diff --git a/examples/callcc.bend b/examples/callcc.bend index 57c484316..c21026062 100644 --- a/examples/callcc.bend +++ b/examples/callcc.bend @@ -1,16 +1,18 @@ # This program is an example that shows how scopeless lambdas can be used. -Seq a b = a +Seq (a: A) (b: B) : A = a # Create a program capable of using `callcc` -CC.lang = λprogram +unchecked CC.lang : ((((a -> b) -> b) -> c) -> c) -> d = +λprogram let callcc = λcallback (λ$garbage($hole) (callback λ$hole(0))); let result = (program callcc); let garbage = $garbage; # Discard `$garbage`, which is the value returned by `callback` (Seq result garbage) -main = (CC.lang λcallcc +main: u24 = (CC.lang λcallcc # This code calls `callcc`, then calls `k` to fill the hole with `42`. # This means that the call to callcc returns `42`, and the program returns `52` (+ 10 (callcc λk(+ (k 42) 1729))) ) + \ No newline at end of file diff --git a/examples/example_fun.bend b/examples/example_fun.bend index 3357091a0..8d701be0c 100644 --- a/examples/example_fun.bend +++ b/examples/example_fun.bend @@ -1,23 +1,38 @@ # Example of some things you can do with the 'fun' syntax -# Write definitions like this +# Define functions like this: +# By default they accept and return any type (Def1) = ((λa a) (λb b)) -# You can call a definition by just referencing its name -# It will be substituted in place of the reference +# You can call a definition by just referencing its name. +# It will be substituted in place of the reference. (Def2) = ((λa a) Def1) -# Definitions and variables can have names in upper and lower case and contain numbers -# Names defined in a more inner position shadow names in an outer position +# Definitions and variables can have names in upper and lower case and +# contain numbers, '.', '-', '_' and '/'. +# Names defined in a more inner position shadow names in an outer position. (def3) = ((λDef1 Def1) (λx λx x)) -# The language is affine, but if you use a variable more than once the compiler inserts duplications for you -# Of course you can always do them manually -(def4) = λz let {z1 z2} = z; (z1 ((λx (x x x x x)) z2)) +# You can annotate a function with the expected types and Bend will check them. +# The parentheses on the left side of the definition are optional. +const (a: A) (b: B) : A = a -# You can use machine numbers and some native numeric operations -# Numeric operations can only reduce numbers, doing (+ (λx x) 1) will not do anything -(nums) = λx1 λx2 (* (+ x1 1) (/ (- x2 2) 1)) +# There are three types of native numbers available: u24, i24 and f24. +# u24: Unsigned 24-bit integers +# i24: Signed 24-bit integers +# f24: Floating point numbers with 24-bit precision +(unsigneds (x1: u24) (x2: u24)) : u24 = (* (+ x1 1) (/ (- x2 2) 1)) + +# '+' or '-' are mandatory for i24. +(signeds (x1: i24) (x2: i24)) : i24 = (* (+ x1 +1) (/ (- x2 -2) +1)) + +# The '.' is mandatory for f24. +(floats (x1: f24) (x2: f24)) : f24 = (* (+ x1 1.0) (/ (- x2 -2.0) +1.0)) + +# Numeric operations are only meaningful on native numbers. +# You can force the compiler to do it anyway by using untyped values. +id = λx x # 'id' wasn't given a type so it's inferred as 'Any'. +bad_nums : Any = (+ id 1) # You can use numbers on the native match expression # The `+` arm binds the `scrutinee`-1 variable to the value of the number -1 @@ -34,7 +49,7 @@ type Bool = True | False # You can have pattern matching on definitions # Use `*` to ignore a pattern (Option.unwrap_or (Option/Some val) *) = val -(Option.unwrap_or Option/None or) = or +(Option.unwrap_or Option/None or) = or (Bool.or Bool/True *) = Bool/True (Bool.or * Bool/True) = Bool/True @@ -45,17 +60,16 @@ type Bool = True | False match bool { Bool/True: Bool/False Bool/False: Bool/True - } + } # Data types can store values -type Boxed = (Box val) +type Boxed T = (Box (val: T)) -# Types with only one constructor can be destructured using `let` or a single matching definition +# Types with only one constructor can be destructured using `open`, +# a single matching definition or a 'match'. (Box.map (Boxed/Box val) f) = (Boxed/Box (f val)) -(Box.unbox) = λbox match box { - Boxed/Box: box.val -} +(Box.unbox (box: (Boxed T))): T = open Boxed box; box.val # Use tuples to store two values together without needing to create a new data type (Tuple.new fst snd) = @@ -69,6 +83,68 @@ type Boxed = (Box val) let (fst, snd) = pair snd +# You can give type annotations to type definitions as well. +# The recursive fields should be annotated with a '~'. +type ListList T = + (Cons (head: (List T)) ~(tail: (ListList T))) | + Nil + +# Bend functions usually center around recursion +sum (List/Nil) = 0 +sum (List/Cons x xs) = (+ x (sum xs)) + +sum_list (ListList/Nil) = List/Nil +sum_list (ListList/Cons x xs) = (List/Cons (sum x) (sum_list xs)) + +# To create local recursive functions that consume a recursive type, you can use 'fold'. +sum_list2 ll = fold ll { + # The fold is implicitly called for fields marked with '~' in their definition. + # In this case, 'll.tail' is replaced with a recursive call to the fold. + ListList/Cons: (List/Cons (sum ll.head) ll.tail) + ListList/Nil: List/Nil +} + +# To do the opposite and create a recursive structure, you can use 'bend'. +# 'bend' can be seen as a tree-like generalization of a while loop. +new_list = bend x = 0 { + when (< x 10): + # 'fork' calls the bend recursively with the provided values. + (List/Cons x (fork (+ x 1))) + else: + List/Nil +} + +# To make programs that are more parallelizable, you generally want to +# avoid lists and use tree-like structures with multiple recursion instead. +sum_nums from to = + if (< from to) { + (+ + (sum_nums from (/ 2 (+ from to))) + (sum_nums (+ 1 (/ 2 (+ from to))) to)) + } else { + from + } + +# Internally, each variable is only used once. If you use a variable +# is used more than once, the compiler will insert duplications for you. +# +# You can also write them manually with 'let {a b} = ...', but then +# your function needs to be unchecked, either by not annotating it +# with types or by marking it as unchecked. +unchecked (def4) = λz let {z1 z2} = z; (z1 ((λx (x x x x x)) z2)) + +# Duplicating a term that duplicates a variable is not allowed and will break the program. +map f (List/Cons x xs) = (List/Cons (f x) (map f xs)) # 'f' is duplicated here +map f (List/Nil) = List/Nil + +# 'map' duplicated the lambda which duplicates 'a'. +# Although this is well defined, it does not produce a sound lambda-calculus result. +VeryBad (a: u24) (xs: (List u24)) : (List u24) = +(map + λ x (+ (* a x) a) # 'a' is duplicated here + [1, 2, 3, 4, 5]) + + # All files must have a main definition to run. # You can execute a program in Bend with "bend run