diff --git a/doc/book.html b/doc/book.html index ff55e0e16..65e44b42a 100644 --- a/doc/book.html +++ b/doc/book.html @@ -11,7 +11,7 @@
2024-02-14: Koka v3.1.0 released with a concurrent build system and improved vscode +language service over stdio. Revised named effect handler typing to match the +formal system more closely. +
2024-01-25: Koka v3.0.4 released with improved VS Code hover and inlay information.
Splits std/core
in multiple modules, fixes bug in infinite expansion for implicits
and various other small improvements.
@@ -1745,7 +1750,7 @@
void map( list_t xs, function_t f,
- list_t* res)
+void map( list_t xs, function_t f,
+ list_t* res)
{
while (is_Cons(xs)) {
if (is_unique(xs)) { // if xs is not shared..
- box_t y = apply(dup(f),xs->head);
+ box_t y = apply(dup(f),xs->head);
if (yielding()) { ... } // if f yields to a general ctl operation..
else {
- xs->head = y;
+ xs->head = y;
*res = xs; // update previous node in-place
res = &xs->tail; // set the result address for the next node
xs = xs->tail; // .. and continue with the next node
@@ -2028,42 +2033,42 @@ else { ... } // slow path allocates fresh nodes
}
- *res = Nil;
+ *res = Nil;
}
Moreover, the Koka compiler also implements tail-recursion modulo cons (TRMC) [11, 12]
-and instead of using a recursive call, the function is eventually optimized
-into an in-place updating loop for the fast path, similar to
-the C code example on the right.
+and instead of using a recursive call, the function is eventually optimized
+into an in-place updating loop for the fast path, similar to
+the C code example on the right.
Importantly, the reuse optimization is guaranteed
and a programmer can see when the optimization applies.
This leads to a new programming technique we call FBIP:
functional but in-place. Just like tail-recursion allows us
-to express loops with regular function calls, reuse analysis
+to express loops with regular function calls, reuse analysis
allows us to express many imperative algorithms in a purely
-functional style.
+functional style.
2.6. Specialization
As another example of the effectiveness of Perceus and the strong semantics
of the Koka core language, we can
-look at the red-black tree example and look at the code generated
+look at the red-black tree example and look at the code generated
when folding a binary tree. The red-black tree is defined as:
-
type color
+type color
Red
Black
-type tree<k,a>
+type tree<k,a>
Leaf
Node(color : color, left : tree<k,a>, key : k, value : a, right : tree<k,a>)
-type colorwhy/color: V
+type colorwhy/color: V
Redwhy/Red: color
Blackwhy/Black: color
-type treewhy/tree: (V, V) -> V<kk: V,aa: V>
+type treewhy/tree: (V, V) -> V<kk: V,aa: V>
Leafwhy/Leaf: forall<a,b> tree<a,b>
Nodewhy/Node: forall<a,b> (color : color, left : tree<a,b>, key : a, value : b, right : tree<a,b>) -> tree<a,b>(color : colorwhy/color: V, left : treewhy/tree: (V, V) -> V<kk: V,aa: V>, key : kk: V, value : aa: V, right : treewhy/tree: (V, V) -> V<kk: V,aa: V>)
@@ -2083,7 +2088,7 @@ This is used in the example to count all the Truestd/core/types/True: bool
values in
+
This is used in the example to count all the Truestd/core/types/True: bool
values in
a tree t : treewhy/tree: (V, V) -> V<k,boolstd/core/types/bool: V>
as:
val count = t.fold(0, fn(k,v,acc) if v then acc+1 else acc)
val count = t.fold(0, fn(k,v,acc) if v then acc+1 else acc)
@@ -2091,30 +2096,30 @@ This may look quite expensive where we pass a polymorphic
+
This may look quite expensive where we pass a polymorphic
first-class function that uses arbitrary precision integer arithmetic.
-However, the Koka compiler first specializes the fold
definition
+However, the Koka compiler first specializes the fold
definition
to the passed function, then simplifies the resulting monomorphic code,
and finally applies Perceus to insert reference count instructions.
This results in the following internal core code:
fun spec-fold(t : tree<k,bool>, acc : int) : int
match t
- Node(_,l,k,v,r) ->
+ Node(_,l,k,v,r) ->
if unique(t) then { drop(k); free(t) } else { dup(l); dup(r) } // perceus inserted
val x = if v then 1 else 0
- spec-fold(r, spec-fold(l,acc) + x)
- Leaf ->
+ spec-fold(r, spec-fold(l,acc) + x)
+ Leaf ->
drop(t)
acc
val count = spec-fold(t,0)
fun spec-fold(t : treewhy/tree: (V, V) -> V<k,boolstd/core/types/bool: V>, acc : intstd/core/types/int: V) : intstd/core/types/int: V
match t
- Nodewhy/Node: forall<a,b> (color : color, left : tree<a,b>, key : a, value : b, right : tree<a,b>) -> tree<a,b>(_,l,k,v,r) ->
+ Nodewhy/Node: forall<a,b> (color : color, left : tree<a,b>, key : a, value : b, right : tree<a,b>) -> tree<a,b>(_,l,k,v,r) ->
if uniquestd/core/unique: () -> ndet int(t) then { drop(k); free(t) } else { dup(l); dup(r) } // perceus inserted
val x = if v then 1 else 0
- spec-fold(r, spec-fold(l,acc) + x)
- Leafwhy/Leaf: forall<a,b> tree<a,b> ->
+ spec-fold(r, spec-fold(l,acc) + x)
+ Leafwhy/Leaf: forall<a,b> tree<a,b> ->
drop(t)
acc
@@ -2127,8 +2132,8 @@
spec_fold:
- ...
- LOOP0:
+ ...
+ LOOP0:
mov x21, x0 ; x20 is t, x21 = acc (x19 = koka context _ctx)
LOOP1: ; the "match(t)" point
cmp x20, #9 ; is t a Leaf?
@@ -2140,12 +2145,12 @@ ldr
w8, [x23, #4] ; w8 = reference count (0 is unique)
cbnz w8, LBB15_11 ; if t is not unique, goto cold path to dup the members
tbz w0, #0, LBB15_13 ; if k is allocated (bit 0 is 0), goto cold path to free it
- LBB15_7:
+ LBB15_7:
mov x0, x23 ; call free(t)
bl _mi_free
- LBB15_8:
+ LBB15_8:
mov x0, x22 ; call spec_fold(l,acc,_ctx)
- mov x1, x21
+ mov x1, x21
mov x2, x19
bl spec_fold
cmp x24, #1 ; boxed value is False?
@@ -2157,30 +2162,30 @@ mov
w1, #5 ; otherwise, use generic bigint addition
mov x2, x19
bl _kk_integer_add_generic
- b LOOP0
+ b LOOP0
...
The polymorphic fold
with its higher order parameter
-is eventually compiled into a tight loop with close to optimal
-assembly instructions.
+is eventually compiled into a tight loop with close to optimal
+assembly instructions.
advancedHere we see too that the node t
is freed explicitly as soon as it is
-no longer live. This is usually earlier than scope-based deallocation
-(like RAII) and therefore Perceus can guarantee to be garbage-free
+no longer live. This is usually earlier than scope-based deallocation
+(like RAII) and therefore Perceus can guarantee to be garbage-free
where in a (cycle-free) program objects are always immediatedly
deallocated as soon as they become unreachable [13–15, 22].
-Moreover, it is deterministic and behaves just like regular
+Moreover, it is deterministic and behaves just like regular
malloc/free calls.
Reference counting may still seem expensive compared to trace-based garbage collection
which only (re)visits all live objects and never needs to free objects
explicitly. However, Perceus usually frees an object right after its
last use (like in our example), and thus the memory is still in the
cache reducing the cost of freeing it. Also, Perceus never (re)visits
-live objects arbitrarily which may trash the caches especially if the
-live set is large. As such, we think the deterministic behavior of
+live objects arbitrarily which may trash the caches especially if the
+live set is large. As such, we think the deterministic behavior of
Perceus together with the garbage-free property may work out better
in practice.
As usual, we start with the familiar Hello world program:
fun main() @@ -2347,7 +2352,7 @@where the body desugars to
repeat( 10, { println(
hi
) } )
which +where the body desugars to
repeat( 10, { println(
hi
) } )
which desugars further torepeat( 10, fn(){ println(
hi
)} )
. The is especially convenient for thewhilestd/core/while: forall<e> (predicate : () -> <div|e> bool, action : () -> <div|e> ()) -> <div|e> ()
loop since this is not a built-in control flow construct but just a regular function: @@ -2376,7 +2381,7 @@To the best of our knowledge, Koka was the first language to have generalized trailing lambdas. It was also one of the first languages to have dot notation (This was independently developed but it turns out -the D language has a similar feature (called UFCS) which predates dot-notation). +the D language has a similar feature (called UFCS) which predates dot-notation). Another novel syntactical feature is the
with
statement. With the ease of passing a function block as a parameter, these often become nested. For example: @@ -2563,7 +2568,7 @@Intuitively, we can view the handler
with fun emit
as a (statically typed) dynamic binding +Intuitively, we can view the handler
@@ -2619,8 +2624,8 @@with fun emit
as a (statically typed) dynamic binding of the functionemit
over the rest of the scope.intstd/core/types/int: V type signified by the question mark:
? intstd/core/types/int: V
.3.1.7. A larger example: cracking Caesar encoding
-Here is a slightly larger program inspired by an -example in Graham Hutton's most +
Here is a slightly larger program inspired by an +example in Graham Hutton's most excellent "Programming in Haskell" book:
fun main() test-uncaesar() @@ -4665,7 +4670,7 @@
@@ -4833,16 +4838,16 @@fun is a reserved word, the word
functional
is considered a single identifier.4.1.1. Source code
Source code consists of a sequence of unicode characters. Valid characters in -actual program code consist strictly of ASCII characters which range from 0 to 127. +actual program code consist strictly of ASCII characters which range from 0 to 127. Only comments, string literals, and character literals are allowed to contain extended unicode characters. The grammar is designed such that a lexical analyzer and parser can directly work on UTF-8 encoded source files without @@ -4737,14 +4742,14 @@
x concat1 visit-left is-nilstd/core/types/is-nil: forall<a> (list : list<a>) -> bool x' Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a> -Truestd/core/types/True: bool +Truestd/core/types/True: bool
surrogate
::= xD800..xDFFF
bidi ::= x200E
|x200F
|x202A..x202E
|x2066..x2069
(bi-directional text control) Actual program code consists only of 7-bit ASCII characters while only comments -and literals can contain extended unicode characters. As such, +and literals can contain extended unicode characters. As such, a lexical analyzer can directly process UTF-8 encoded input as -a sequence of bytes without needing UTF-8 decoding or unicode character +a sequence of bytes without needing UTF-8 decoding or unicode character classification1. +…" class="footnote-ref localref" >1. For security [2], some character ranges are excluded: the C0 and C1 +Boucher and Anderson, 2021" class="bibref localref" style="target-element:bibitem">2]
, some character ranges are excluded: the C0 and C1 control codes (except for space, tab, carriage return, and line feeds), surrogate characters, and bi-directional text control characters. @@ -4854,10 +4859,10 @@
Any block that is indented is automatically wrapped with curly braces: -
fun show-messages1( msgs : list<string> ) : console ()
+fun show-messages1( msgs : list<string> ) : console ()
msgs.foreach fn(msg)
println(msg)
-fun show-messages1spec/show-messages1: (msgs : list<string>) -> console ()( msgsmsgs: list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> )result: -> console () : consolestd/core/console/console: X (std/core/types/unit: V)std/core/types/unit: V
+fun show-messages1spec/show-messages1: (msgs : list<string>) -> console ()( msgsmsgs: list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> )result: -> console () : consolestd/core/console/console: X (std/core/types/unit: V)std/core/types/unit: V
msgs.foreach fn(msg)
println(msg)
@@ -4880,12 +4885,12 @@
Any statements and declarations that are aligned in a block are terminated with semicolons, that is:
-
fun show-messages2( msgs : list<string> ) : console ()
+fun show-messages2( msgs : list<string> ) : console ()
msgs.foreach fn(msg)
println(msg)
println("--")
println("done")
-fun show-messages2spec/show-messages2: (msgs : list<string>) -> console ()( msgsmsgs: list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> )result: -> console () : consolestd/core/console/console: X (std/core/types/unit: V)std/core/types/unit: V
+fun show-messages2spec/show-messages2: (msgs : list<string>) -> console ()( msgsmsgs: list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> )result: -> console () : consolestd/core/console/console: X (std/core/types/unit: V)std/core/types/unit: V
msgs.foreach fn(msg)
println(msg)
println("--")
@@ -4916,30 +4921,30 @@ fun eq2( x : int,
- y : int ) : io bool
+fun eq2( x : int,
+ y : int ) : io bool
print("calc " ++
"equ" ++
"ality")
val result = if (x == y)
then True
else False
- result
-fun eq2spec/eq2: (x : int, y : int) -> io bool( xx: int : intstd/core/types/int: V,
- y :std/core/types/int: V intstd/core/types/int: V ) :std/core/io: E iostd/core/io: E boolstd/core/types/bool: V
+ result
+fun eq2spec/eq2: (x : int, y : int) -> io bool( xx: int : intstd/core/types/int: V,
+ y :std/core/types/int: V intstd/core/types/int: V ) :std/core/io: E iostd/core/io: E boolstd/core/types/bool: V
print("calc " ++
"equ" ++
"ality")
val result = if x: int(xstd/core/int/(==): (x : int, y : int) -> <console,alloc<global>,div,exn,fsys,ndet,net,read<global>,ui,write<global>> bool == y)
then Truestd/core/types/True: bool
else Falsestd/core/types/False: bool
- result
+ result
is elaborated to:
-
fun eq2( x : int,
- y : int ) : io bool {
+fun eq2( x : int,
+ y : int ) : io bool {
print("calc " ++
"equ" ++
"ality");
@@ -4948,8 +4953,8 @@ fun eq2spec/eq2: (x : int, y : int) -> io bool( x : intstd/core/types/int: V,
- y : intstd/core/types/int: V ) : iostd/core/io: E boolstd/core/types/bool: V {
+
fun eq2spec/eq2: (x : int, y : int) -> io bool( x : intstd/core/types/int: V,
+ y : intstd/core/types/int: V ) : iostd/core/io: E boolstd/core/types/bool: V {
print("calc " ++
"equ" ++
"ality");
@@ -4966,13 +4971,13 @@ if expression no braces or semicolons are inserted
as the indented lines start with then
and else
respectively.
In the parameter declaration, the ,
signifies the continuation.
-More precisely, for long expressions and declarations, indented or aligned lines
+More precisely, for long expressions and declarations, indented or aligned lines
do not get braced or semicolons if:
- The line starts with a clear expression or declaration start continuation token,
-namely: an operator (including
.
), then
, else
, elif
,
-a closing brace ()
, >
, ]
, or }
),
+namely: an operator (including .
), then
, else
, elif
,
+a closing brace ()
, >
, ]
, or }
),
or one of ,
, ->
, {
, =
, |
, ::
, .
, :=
.
- The previous line ends with a clear expression or declaration end continuation token,
@@ -4981,25 +4986,25 @@
The layout algorithm is performed on the token stream in-between lexing
-and parsing, and is independent of both. In particular, there are no intricate
-dependencies with the parser (which leads to very complex layout rules, as is the
+and parsing, and is independent of both. In particular, there are no intricate
+dependencies with the parser (which leads to very complex layout rules, as is the
case in languages like Haskell or JavaScript).
-
Moreover, in contrast to purely token-based layout rules (as in Scala or Go for example),
-the visual indentation in a Koka program corresponds directly to how the compiler
+
Moreover, in contrast to purely token-based layout rules (as in Scala or Go for example),
+the visual indentation in a Koka program corresponds directly to how the compiler
interprets the statements. Many tricky layout
examples in other programming languages are often based on a mismatch between
the visual representation and how a compiler interprets the tokens – with
-Koka's layout rule such issues are largely avoided.
+Koka's layout rule such issues are largely avoided.
-Of course, it is still allowed to explicitly use semicolons and braces,
+
Of course, it is still allowed to explicitly use semicolons and braces,
which can be used for example to put multiple statements on a single line:
fun equal-line( x : int, y : int ) : io bool {
print("calculate equality"); (x == y)
-}
+}
fun equal-linespec/equal-line: (x : int, y : int) -> io bool( xx: int : intstd/core/types/int: V, yy: int : intstd/core/types/int: V )result: -> io bool : iostd/core/io: E boolstd/core/types/bool: V {
printstd/core/console/string/print: (s : string) -> <console,alloc<global>,div,exn,fsys,ndet,net,read<global>,ui,write<global>> ()("calculate equality"literal: string
count= 18); (xx: int ==std/core/int/(==): (x : int, y : int) -> <console,alloc<global>,div,exn,fsys,ndet,net,read<global>,ui,write<global>> bool yy: int)
-}
+}
@@ -5009,18 +5014,18 @@ fun equal( x : int, y : int ) : io bool {
+fun equal( x : int, y : int ) : io bool {
print("calculate equality")
result = if (x == y) then True // wrong: too little indentation
/* wrong */ else False
result
-}
-fun equal( x : intstd/core/types/int: V, y : intstd/core/types/int: V ) : iostd/core/io: E boolstd/core/types/bool: V {
+}
+fun equal( x : intstd/core/types/int: V, y : intstd/core/types/int: V ) : iostd/core/io: E boolstd/core/types/bool: V {
print("calculate equality")
result = if (x == y) then Truestd/core/types/True: bool // wrong: too little indentation
/* wrong */ else Falsestd/core/types/False: bool
result
-}
+}
@@ -5072,15 +5077,15 @@
Semicolon insertion: For each non-blank line, the
-indentation must be equal or larger to the layout indentation.
+indentation must be equal or larger to the layout indentation.
If the indentation is equal to the layout indentation, and the first
lexeme on the line is not an expression continuation, a semicolon
-is inserted before the lexeme.
+is inserted before the lexeme.
Also, a semicolon is always inserted before a closing brace }
and
before the end of the token sequence.
As defined, braces are inserted around any indented blocks, semicolons +
As defined, braces are inserted around any indented blocks, semicolons are inserted whenever statements or declarations are aligned (unless the lexeme happens to be a clear expression continuation). To simplify the grammar specification, a semicolon is also always inserted before @@ -5452,16 +5457,16 @@