forked from orangeduck/BuildYourOwnLisp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chapter12_functions.html
1259 lines (947 loc) · 50.4 KB
/
chapter12_functions.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<h1>Functions <small>• Chapter 12</small></h1>
<h2>What is a Function?</h2> <hr/>
<p>Functions are the essence of all programming. Back in the early days of computer science they represented a naive dream. The idea was that we could reduce computation into these smaller and smaller bits of re-usable code. Given enough time, and a proper structure for libraries, eventually we would have written code required for all computational needs. No longer would people have to write their own functions, and programming would consist of an easy job of stitching together components.</p>
<p>This dream hasn't come true yet, but it persists, no matter how flawed. Each new programming technique, or paradigm, that comes along shakes up this idea a little. They promise better re-use of code. Better abstractions, and an easier life for all.</p>
<div class='pull-right alert alert-warning' style="margin: 15px; text-align: center;">
<img src="/static/img/bananas.png" alt="bananas"/>
<p><small>Bananaphone • Another naive dream.</small></p>
</div>
<p>In reality what each paradigm delivers is simply <em>different</em> abstractions. There has always been a trade-off. For each higher level of thinking about programming, some piece is thrown away. And this means, no matter how well you decide what to keep and what to leave, occasionally someone will need that piece that has been lost. But through all of this, one way or the other, functions have always persisted, and continually proven to be effective.</p>
<p>We've used functions in C, we know what they <em>look like</em>, but we don't know exactly what they <em>are</em>. Here are a few ways to think about them.</p>
<p>One way to think about functions is as description of some computation you want to be performed later. When you define a function it is like saying "when I use <em>this</em> name I want <em>that</em> sort of thing to happen". This is a very practical idea of a function. It is very intuitive, and metaphorical to language. This is the way you would command a human, or animal. Another thing I like about this is that it captures the delayed nature of functions. Functions are defined once, but can be called on repeatedly after.</p>
<p>Another way to think about functions is as a black box that takes some input and produces some output. This idea is subtly different from the former. It is more algebraic, and doesn't talk about <em>computation</em> or <em>commands</em>. This idea is a mathematical concept, and is not tied to some particular machine, or language. In some situations this idea is exceptionally useful. It allows us to think about functions without worrying about their internals, or how they are computed exactly. We can then combine and compose them together without worry of something subtle going wrong. This is the core idea behind an abstraction, and is what allows layers of complexity to work together with each other rather than conflict. This idea's strength can also be its downfall. Because it does not mention anything about computation it does not deal with a number of real world concerns. <em>"How long will this function take to run?"</em>, <em>"Is this function efficient?"</em>, <em>"Will it modify the state of my program? If so how?"</em>.</p>
<div class='pull-left alert alert-warning' style="margin: 15px; text-align: center;">
<img src="/static/img/black_box.png" alt="black_box"/>
<p><small>Black Box • Your typical function.</small></p>
</div>
<p>A third method is to think of functions as <em>partial computations</em>. Like the Mathematical model they can take some inputs. These values are required before it can complete the computation. This is why it is called <em>partial</em>. But like the computational model, the body of the function consists of a computation specified in some language of commands. These inputs are called <em>unbound variables</em>, and to finish the computation one simply supplies them. Like fitting a cog into a machine which was before spinning aimlessly, this completes all that is needed for the computation to run, and the machine runs. The output of these <em>partial computations</em> is itself a variable with an unknown value. This output can be placed as input to a new function, and so one function relies on another.</p>
<p>An advantage of this idea over the Mathematical Model is that we recognize that functions <em>contain computation</em>. We see that when the computation runs, some physical process is going on in the machine. This means we recognize the fact that certain things take time to elapse, or that a function might change the program state, or do anything else we're not sure about!</p>
<p>All these ideas are explored in the study of functions, <em>Lambda calculus</em>. This is a field that combines logic, maths, and computer science. The name comes from the Greek letter Lambda, which is used in the representation of <em>binding variables</em>. Using Lambda calculus gives a way of defining, composing and building <em>functions</em> using a simple mathematical notation.</p>
<p>We are going to use all of the previous ideas to add user defined functions to our language. Lisp is already well suited to this sort of playing around, and using these concepts it won't take much work for us to implement functions.</p>
<p>The first step will be to write a builtin function that can create user defined functions. Here is one idea as to how it can be specified. The first argument could be a list of symbols, just like our <code>def</code> function. These symbols we call the <em>formal arguments</em>, also known as the <em>unbound variables</em>. They act as the inputs to our <em>partial computation</em>. The second argument could be another list. When running the function this is going to be evaluated with our builtin <code>eval</code> function.</p>
<p>This function we'll call just <code>\</code>, (a homage to The Lambda Calculus as the <code>\</code> character looks a little bit like a lambda). To create a function which takes two inputs and adds them together, we would then write something like this.</p>
<pre><code data-language='lispy'>\ {x y} {+ x y}</code></pre>
<p>We can call the function by putting it as the first argument in a normal S-Expression</p>
<pre><code data-language='lispy'>(\ {x y} {+ x y}) 10 20</code></pre>
<p>If we want to name this function we can pass it to our existing builtin <code>def</code> like any other value and store it in the environment.</p>
<pre><code data-language='lispy'>def {add-together} (\ {x y} {+ x y})</code></pre>
<p>Then we can call it by refering to it by name.</p>
<pre><code data-language='lispy'>add-together 10 20</code></pre>
<h2>Function Type</h2> <hr/>
<p>To store a function as an <code>lval</code> we need to think exactly what it consists of.</p>
<p>Using the previous definition, a function should consists of three parts. First is the list of <em>formal arguments</em>, which we must bind before we can evaluate the function. The second part is a Q-Expression that represents the body of the function. Finally we require a location to store the values assigned to the <em>formal arguments</em>. Luckily we already have a structure for storing variables, an <em>environment</em>.</p>
<p>We will store our builtin functions and user defined functions under the same type <code>LVAL_FUN</code>. This means we need a way internally to differentiate between them. To do this we can check if the <code>lbuiltin</code> function pointer is <code>NULL</code> or not. If it is not <code>NULL</code> we know the <code>lval</code> is some builtin function, otherwise we know it is a user function.</p>
<pre><code data-language='c'>struct lval {
int type;
/* Basic */
long num;
char* err;
char* sym;
/* Function */
lbuiltin builtin;
lenv* env;
lval* formals;
lval* body;
/* Expression */
int count;
lval** cell;
};</code></pre>
<p>We've renamed the <code>lbuiltin</code> field from <code>func</code> to <code>builtin</code>. We should make sure to change this in all the places it is used in our code.</p>
<p>We also need to create a constructor for user defined <code>lval</code> functions. Here we build a new environment for the function, and assign the <code>formals</code> and <code>body</code> values to those passed in. </p>
<pre><code data-language='c'>lval* lval_lambda(lval* formals, lval* body) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_FUN;
/* Set Builtin to Null */
v->builtin = NULL;
/* Built new environment */
v->env = lenv_new();
/* Set Formals and Body */
v->formals = formals;
v->body = body;
return v;
}</code></pre>
<p>As with whenever we change our <code>lval</code> type we need to update the functions for <em>deletion</em>, <em>copying</em>, and <em>printing</em> to deal with the changes. For evaluation we'll need to look in greater depth.</p>
<p>For <strong>Deletion</strong>...</p>
<pre><code data-language='c'>case LVAL_FUN:
if (v->builtin != NULL) {
lenv_del(v->env);
lval_del(v->formals);
lval_del(v->body);
}
break;</code></pre>
<p>For <strong>Copying</strong>...</p>
<pre><code data-language='c'>case LVAL_FUN:
x->builtin = v->builtin;
if (x->builtin != NULL) {
x->env = lenv_copy(v->env);
x->formals = lval_copy(v->formals);
x->body = lval_copy(v->body);
}
break;</code></pre>
<p>For <strong>Printing</strong>...</p>
<pre><code data-language='c'>case LVAL_FUN:
if (v->builtin) {
printf("<builtin>");
} else {
printf("(\\ "); lval_print(v->formals); putchar(' '); lval_print(v->body); putchar(')');
}
break;</code></pre>
<h2>Lambda Function</h2> <hr/>
<p>We can now add a builtin for our lambda function. We want it to take as input some list of symbols, and a list that represents the code. After that it should return a function <code>lval</code>. We've defined a few of builtins now, and this one will follow the same format. Like in <code>def</code> we do some error checking to ensure the argument types and count are correct (using some newly defined Macros). Then we just pop the first two arguments from the list and pass them to our previously defined function <code>lval_lambda</code>.</p>
<pre><code data-language='c'>lval* builtin_lambda(lenv* e, lval* a) {
/* Check Two arguments, each of which are Q-Expressions */
LASSERT_NUM("\\", a, 2);
LASSERT_TYPE("\\", a, 0, LVAL_QEXPR);
LASSERT_TYPE("\\", a, 1, LVAL_QEXPR);
/* Check first Q-Expression contains only Symbols */
for (int i = 0; i < a->cell[0]->count; i++) {
LASSERT(a, (a->cell[0]->cell[i]->type == LVAL_SYM),
"Cannot define non-symbol. Got %s, Expected %s.",
ltype_name(a->cell[0]->cell[i]->type), ltype_name(LVAL_SYM));
}
/* Pop first two arguments and pass them to lval_lambda */
lval* formals = lval_pop(a, 0);
lval* body = lval_pop(a, 0);
lval_del(a);
return lval_lambda(formals, body);
}</code></pre>
<div class='pull-right alert alert-warning' style="margin: 15px; text-align: center;">
<img src="/static/img/playgroup.png" alt="playgroup"/>
<p><small>Playgroup • Your typical parent environment.</small></p>
</div>
<h2>Parent Environment</h2> <hr/>
<p>We've given functions their own environment. In this environment we will place the values that their formal arguments are set to. When we come to evaluate the body of the function we can do it in this environment and know that those variables will have the correct values.</p>
<p>But ideally we also want these functions to be able to access variables which are in the global environment, such as our builtin functions.</p>
<p>We can solve this problem by changing the definition of our environment to contain a reference to some <em>parent</em> environment. Then, when we want to evaluate a function, we can set this <em>parent</em> environment to our global environment, which has all of our builtins defined within.</p>
<p>When we add this to our <code>lenv</code> struct, conceptually it will be a <em>reference</em> to a parent environment, not some sub-environment or anything like that. Because of this we shouldn't <em>delete</em> it when our <code>lenv</code> gets deleted, or copy it when our <code>lenv</code> gets copied.</p>
<p>The way the <em>parent environment</em> works is simple. If someone calls <code>lenv_get</code> on the environment, and the symbol cannot be found. It will look then in any parent environment to see if the named value exists there, and repeat the process till either the variable is found or there are no more parents. To signify that an environment has no parent we set the reference to <code>NULL</code>.</p>
<p>The constructor function only require basic changes to allow for this.</p>
<pre><code data-language='c'>struct lenv {
lenv* par;
int count;
char** syms;
lval** vals;
};
lenv* lenv_new(void) {
lenv* e = malloc(sizeof(lenv));
e->par = NULL;
e->count = 0;
e->syms = NULL;
e->vals = NULL;
return e;
}
</code></pre>
<p>To get a value from an environment we need to add in the search of the parent environment in the case that a symbol is not found.</p>
<pre><code data-language='c'>lval* lenv_get(lenv* e, lval* k) {
for (int i = 0; i < e->count; i++) {
if (strcmp(e->syms[i], k->sym) == 0) { return lval_copy(e->vals[i]); }
}
/* If no symbol check in parent otherwise error */
if (e->par) {
return lenv_get(e->par, k);
} else {
return lval_err("Unbound Symbol '%s'", k->sym);
}
}</code></pre>
<p>Because we have a new <code>lval</code> type that has its own environment we need a function for copying environments, to use for when we copy <code>lval</code> structs.</p>
<pre><code data-language='c'>lenv* lenv_copy(lenv* e) {
lenv* n = malloc(sizeof(lenv));
n->par = e->par;
n->count = e->count;
n->syms = malloc(sizeof(char*) * n->count);
n->vals = malloc(sizeof(lval*) * n->count);
for (int i = 0; i < e->count; i++) {
n->syms[i] = malloc(strlen(e->syms[i]) + 1);
strcpy(n->syms[i], e->syms[i]);
n->vals[i] = lval_copy(e->vals[i]);
}
return n;
}</code></pre>
<p>Having parent environments also changes our concept of <em>defining</em> a variable.</p>
<p>There are two ways we could define a variable now. Either we could define it in the local, innermost environment, or we could define it in the global, outermost environment. We will add functions to do both. We'll leave the <code>lenv_put</code> method the same. It can be used for definition in the local environment. But we'll add a new function <code>lenv_def</code> for definition in the global environment. This works by simply following the parent chain up before using <code>lval_put</code> to define locally.</p>
<pre><code data-language='c'>void lenv_def(lenv* e, lval* k, lval* v) {
/* Iterate till e has no parent */
while (e->par) { e = e->par; }
/* Put value in e */
lenv_put(e, k, v);
}</code></pre>
<p>At the moment this distinction may seem useless, but later on we will use it to write partial results of calculations to local variables inside a function. We should add another builtin for <em>local</em> assignment. We'll call this <code>put</code> in C, but give it the <code>=</code> symbol in Lisp. We can adapt our <code>builtin_def</code> function and re-use the common code, just like we do with our mathematical operators.</p>
<pre><code data-language='c'>lval* builtin_var(lenv* e, lval* a, char* func) {
LASSERT_TYPE(func, a, 0, LVAL_QEXPR);
lval* syms = a->cell[0];
for (int i = 0; i < syms->count; i++) {
LASSERT(a, (syms->cell[i]->type == LVAL_SYM),
"Function '%s' cannot define non-symbol. Got %s, Expected %s.",
func, ltype_name(syms->cell[i]->type), ltype_name(LVAL_SYM));
}
LASSERT(a, (syms->count == a->count-1),
"Function '%s' passed too many arguments for symbols. Got %i, Expected %i.",
func, syms->count, a->count-1);
for (int i = 0; i < syms->count; i++) {
/* If 'def' define in global scope. If 'put' define in local scope */
if (strcmp(func, "def") == 0) { lenv_def(e, syms->cell[i], a->cell[i+1]); }
if (strcmp(func, "=") == 0) { lenv_put(e, syms->cell[i], a->cell[i+1]); }
}
lval_del(a);
return lval_sexpr();
}
lval* builtin_def(lenv* e, lval* a) { return builtin_var(e, a, "def"); }
lval* builtin_put(lenv* e, lval* a) { return builtin_var(e, a, "="); }</code></pre>
<p>Then we need to register these as a builtins.</p>
<pre><code data-language='c'>lenv_add_builtin(e, "def", builtin_def);
lenv_add_builtin(e, "=", builtin_put);</code></pre>
<h2>Function Calling</h2> <hr/>
<p>We need to write the code that runs when an expression gets evaluated and an function <code>lval</code> is called.</p>
<p>When this function type is a builtin we can call it as before, using the function pointer, but we need to do something separate for our user defined functions. We need to bind each of the arguments passed in, to each of the symbols in the <code>formals</code> field. Once this is done we need to evaluate the <code>body</code> field, using the <code>env</code> field as an environment, and the calling environment as a parent.</p>
<p>A first attempt, without error checking, might look like this:</p>
<pre><code data-language='c'>lval* lval_call(lenv* e, lval* f, lval* a) {
/* If Builtin then simply call that */
if (f->builtin) { return f->builtin(e, a); }
/* Assign each argument to each formal in order */
for (int i = 0; i < a->count; i++) {
lenv_put(f->env, f->formals->cell[i], a->cell[i]);
}
lval_del(a);
/* Set the parent environment */
f->env->par = e;
/* Evaluate the body */
return builtin_eval(f->env, lval_add(lval_sexpr(), lval_copy(f->body)));
}</code></pre>
<p>This works fine providing all error conditions are met, but it doesn't deal correctly with the case where the number of arguments supplied, and the number of formal arguments required, differ. In this situation it will just crash.</p>
<p>Actually this is an interesting case, and leaves us a couple of options. We <em>could</em> just throw an error when the argument count supplied is incorrect, but we can do something more fun. When too few arguments are supplied we could instead bind the first few formal arguments of the function and then return it, leaving the rest unbound.</p>
<p>This creates a function that has been <em>partially evaluated</em> and reflects our previous idea of a function being some kind of <em>partial computation</em>. If we start with a function that takes two arguments, and pass in a single argument, we can bind this first argument and return a new function with its first formal argument bound, and its second remaining empty.</p>
<p>We can use this metaphor to create a cute image of how functions work. We can imagine each expression consisting of a function at the front which repeatedly consumes the input directly to its right. Once it has consumed the input to its right, if it is full (requires no more inputs) it evaluates and replaces itself with some value. If it needs more it replaced itself with another new, fuller function, with one of its variables bound and repeats the process.</p>
<p>So you can imagine functions like a little Pac-Man, not consuming all inputs at once, but iteratively eating inputs to the right and getting bigger and bigger until it is full. This isn't actually how we're going to implement it in code, but either way it is <em>fun to imagine</em>.</p>
<pre><code data-language='c'>lval* lval_call(lenv* e, lval* f, lval* a) {
/* If Builtin then simply apply that */
if (f->builtin) { return f->builtin(e, a); }
/* Record Argument Counts */
int given = a->count;
int total = f->formals->count;
/* While arguments still remain to be processed */
while (a->count) {
/* If we've ran out of formal arguments to bind */
if (f->formals->count == 0) {
lval_del(a); return lval_err("Function passed too many arguments. Got %i, Expected %i.", given, total);
}
/* Pop the first symbol from the formals */
lval* sym = lval_pop(f->formals, 0);
/* Pop the next argument from the list */
lval* val = lval_pop(a, 0);
/* Bind a copy into the function's environment */
lenv_put(f->env, sym, val);
/* Delete symbol and value */
lval_del(sym); lval_del(val);
}
/* Argument list is now bound so can be cleaned up */
lval_del(a);
/* If all formals have been bound evaluate */
if (f->formals->count == 0) {
/* Set Function Environment parent to current evaluation Environment */
f->env->par = e;
/* Evaluate and return */
return builtin_eval(f->env, lval_add(lval_sexpr(), lval_copy(f->body)));
} else {
/* Otherwise return partially evaluated function */
return lval_copy(f);
}
}</code></pre>
<p>The above function does exactly as we explained above, with correct error handling added in too. First it iterates over the passed in arguments attempting to place each one in the environment. Then it checks if the environment is full, and if so evaluates, otherwise returns a copy of itself with some arguments filled.</p>
<p>If we update our evaluation function <code>lval_eval_sexpr</code> to call <code>lval_call</code>, we can give our new system a spin.</p>
<pre><code data-language='c'>lval* f = lval_pop(v, 0);
if (f->type != LVAL_FUN) {
lval* err = lval_err(
"S-Expression starts with incorrect type. Got %s, Expected %s.",
ltype_name(f->type), ltype_name(LVAL_FUN));
lval_del(f); lval_del(v);
return err;
}
lval* result = lval_call(e, f, v);
</code></pre>
<p>Try defining some functions and test out how partial evaluation works.</p>
<pre><code data-language='lispy'>lispy> \ {x y} {+ x y}
(\ {x y} {+ x y})
lispy> (\ {x y} {+ x y}) 10 20
30
lispy> def {add-mul} (\ {x y} {+ x (* x y)})
()
lispy> add-mul 10 20
210
lispy> add-mul 10
(\ {y} {+ x (* x y)})
lispy> def {add-mul-10} (add-mul 10)
()
lispy> add-mul-10 50
510
lispy>
</code></pre>
<h2>Variable Arguments</h2> <hr/>
<p>We've defined some of our builtin functions so they can take in a variable number of arguments. Functions like <code>+</code> and <code>join</code> can take any number of arguments, and operator on them logically. We should find a way to let user defined functions work on multiple arguments also.</p>
<p>Unfortunately there isn't an elegant way for us to allow for this, without adding in some special syntax. So we're going to hard-code some system into our language using a special symbol <code>&</code>.</p>
<p>We are going to let users define formal arguments that look like <code>{x & xs}</code>, which means that a function will take in a single argument <code>x</code>, followed by zero or more other arguments, joined together into a list called <code>xs</code>. This is a bit like the ellipsis we used to declare variable arguments in C.</p>
<p>When assigning our formal arguments we're going look for a <code>&</code> symbol and if it exists take the next formal argument and assign it any remaining supplied arguments we've been passed. It's important we convert this argument list to a Q-Expression. We need to also remember to check that <code>&</code> is followed by a real symbol, and if it isn't we should throw an error.</p>
<p>Just after the first symbol is popped from the formals in the <code>while</code> loop of <code>lval_call</code> we can add this special case.</p>
<pre><code data-language='c'>/* Special Case to deal with '&' */
if (strcmp(sym->sym, "&") == 0) {
/* Ensure '&' is followed by another symbol */
if (f->formals->count != 1) {
lval_del(a);
return lval_err("Function format invalid. Symbol '&' not followed by single symbol.");
}
/* Next formal should be bound to remaining arguments */
lval* nsym = lval_pop(f->formals, 0);
lenv_put(f->env, nsym, builtin_list(e, a));
lval_del(sym); lval_del(nsym);
break;
}</code></pre>
<p>Suppose when calling the function the user doesn't supply any variable arguments, but only the first named ones. In this case we need to set the symbol following <code>&</code> to the empty list. Just after we delete the argument list, and before we check to see if all the formals have been evaluated add in this special case.</p>
<pre><code data-language='c'>/* If '&' remains in formal list it should be bound to empty list */
if (f->formals->count > 0 &&
strcmp(f->formals->cell[0]->sym, "&") == 0) {
/* Check to ensure that & is not passed invalidly. */
if (f->formals->count != 2) {
return lval_err("Function format invalid. Symbol '&' not followed by single symbol.");
}
/* Pop and delete '&' symbol */
lval_del(lval_pop(f->formals, 0));
/* Pop next symbol and create empty list */
lval* sym = lval_pop(f->formals, 0);
lval* val = lval_qexpr();
/* Bind to environment and delete */
lenv_put(f->env, sym, val);
lval_del(sym); lval_del(val);
}</code></pre>
<h2>Interesting Functions</h2> <hr/>
<h3>Function Definition</h3>
<p>Lambdas are clearly a simple and powerful way of defining functions. But the syntax is a little clumsy. There are a lot of brackets and symbols involved. Here is an interesting idea. We can try to write a function that defines a function itself, using in some way nicer syntax.</p>
<p>Essentially what we want is a function that can perform two steps at once. First creating a new function and then defining it some name. We could let the user supply the name and the formal arguments altogether and then separate these out for them and use them in the definition. Here is a function that does that. It takes as input some arguments and some body. It takes the head of the arguments to be the function name and the rest to be the formals to the function body.</p>
<pre><code data-language='lispy'>\ {args body} {def (head args) (\ (tail args) body)}</code></pre>
<p>We can name this function something like <code>fun</code> by passing it to <code>def</code> as usual.</p>
<pre><code data-language='lispy'>def {fun} (\ {args body} {def (head args) (\ (tail args) body)})</code></pre>
<p>This means that we can now define functions in a much simpler and nicer way. To define our previously mentioned <code>add-together</code> we can do the following. Functions that can define functions. That is certainly something we could never do in C. How cool is that!</p>
<pre><code data-language='lispy'>fun {add-together x y} {+ x y}</code></pre>
<div class='pull-right alert alert-warning' style="margin: 15px; text-align: center;">
<img src="/static/img/curry.png" alt="curry"/>
<p><small>Currying • Not as good as it sounds.</small></p>
</div>
<h3>Currying</h3>
<p>At the moment functions like <code>+</code> take a variable number of arguments. In some situations that's great, but what if we instead had a list of arguments we wished to pass it. In this situation it is rendered somewhat useless.</p>
<p>Again we can try to create a function to solve this problem. If we can create a list in the format we wish to use for our expression we can use <code>eval</code> to treat it as such. In the situation of <code>+</code> we could append this function to the front of the list and then perform the evaluation.</p>
<p>We can define a function <code>unpack</code> that does this. It takes as input some function and some list and appends the function to the front of the list, before evaluating it.</p>
<pre><code data-language='lispy'>fun {unpack f xs} {eval (join (list f) xs)}</code></pre>
<p>In some situations we might be faced with the opposite dilemma. We may have a function that takes as input some list, but we wish to call it using variable arguments. In this case the solution is even simpler. We use the fact that our <code>&</code> syntax for variable arguments packs up variable arguments into a list for us.</p>
<pre><code data-language='lispy'>fun {pack f & xs} {f xs}</code></pre>
<p>In some languages this is called <em>currying</em> and <em>uncurrying</em> respectively. This is named after <em>Haskell Curry</em> and unfortunately has nothing to do with our favourite spicy food.</p>
<pre><code data-language='lispy'>lispy> uncurry head 5 6 7
{5}
lispy> curry + {5 6 7}
18
</code></pre>
<p>Because of the way our <em>partial evaluation</em> works we don't need to think of <em>currying</em> with a specific set of arguments. We can think of functions themselves being in <em>curried</em> or <em>uncurried</em> form.
<pre><code data-language='lispy'>lispy> def {add-unpacked} (curry +)
()
lispy> add-unpacked {5 6 7}
18
</code></pre>
<p>Have a play around and see what other interesting and powerful functions you can try to come up with. You might be a bit limited for now. In the next chapter well add conditionals which will really start to make our language more complete. But that doesn't mean you won't be able to come up with some other interesting ideas now. Our Lisp really is getting richer!</p>
<h2>Reference</h2> <hr/>
<div class="panel-group alert alert-warning" id="accordion">
<div class="panel panel-default">
<div class="panel-heading">
<h4 class="panel-title">
<a data-toggle="collapse" data-parent="#accordion" href="#collapseOne">
functions.c
</a>
</h4>
</div>
<div id="collapseOne" class="panel-collapse collapse">
<div class="panel-body">
<pre><code data-language='c'>#include "mpc.h"
#ifdef _WIN32
static char buffer[2048];
char* readline(char* prompt) {
fputs("lispy> ", stdout);
fgets(buffer, 2048, stdin);
char* cpy = malloc(strlen(buffer)+1);
strcpy(cpy, buffer);
cpy[strlen(cpy)-1] = '\0';
return cpy;
}
void add_history(char* unused) {}
#else
#include <editline/readline.h>
#include <editline/history.h>
#endif
/* Forward Declarations */
struct lval;
struct lenv;
typedef struct lval lval;
typedef struct lenv lenv;
/* Lisp Value */
enum { LVAL_ERR, LVAL_NUM, LVAL_SYM, LVAL_FUN, LVAL_SEXPR, LVAL_QEXPR };
typedef lval*(*lbuiltin)(lenv*, lval*);
struct lval {
int type;
/* Basic */
long num;
char* err;
char* sym;
/* Function */
lbuiltin builtin;
lenv* env;
lval* formals;
lval* body;
/* Expression */
int count;
lval** cell;
};
lval* lval_num(long x) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_NUM;
v->num = x;
return v;
}
lval* lval_err(char* fmt, ...) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_ERR;
va_list va;
va_start(va, fmt);
v->err = malloc(512);
vsnprintf(v->err, 511, fmt, va);
v->err = realloc(v->err, strlen(v->err)+1);
va_end(va);
return v;
}
lval* lval_sym(char* s) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_SYM;
v->sym = malloc(strlen(s) + 1);
strcpy(v->sym, s);
return v;
}
lval* lval_builtin(lbuiltin func) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_FUN;
v->builtin = func;
return v;
}
lenv* lenv_new(void);
lval* lval_lambda(lval* formals, lval* body) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_FUN;
/* Set Builtin to Null */
v->builtin = NULL;
/* Built new environment */
v->env = lenv_new();
/* Set Formals and Body */
v->formals = formals;
v->body = body;
return v;
}
lval* lval_sexpr(void) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_SEXPR;
v->count = 0;
v->cell = NULL;
return v;
}
lval* lval_qexpr(void) {
lval* v = malloc(sizeof(lval));
v->type = LVAL_QEXPR;
v->count = 0;
v->cell = NULL;
return v;
}
void lenv_del(lenv* e);
void lval_del(lval* v) {
switch (v->type) {
case LVAL_NUM: break;
case LVAL_FUN:
if (!v->builtin) {
lenv_del(v->env);
lval_del(v->formals);
lval_del(v->body);
}
break;
case LVAL_ERR: free(v->err); break;
case LVAL_SYM: free(v->sym); break;
case LVAL_QEXPR:
case LVAL_SEXPR:
for (int i = 0; i < v->count; i++) {
lval_del(v->cell[i]);
}
free(v->cell);
break;
}
free(v);
}
lenv* lenv_copy(lenv* e);
lval* lval_copy(lval* v) {
lval* x = malloc(sizeof(lval));
x->type = v->type;
switch (v->type) {
case LVAL_FUN:
if (v->builtin) {
x->builtin = v->builtin;
} else {
x->builtin = NULL;
x->env = lenv_copy(v->env);
x->formals = lval_copy(v->formals);
x->body = lval_copy(v->body);
}
break;
case LVAL_NUM: x->num = v->num; break;
case LVAL_ERR: x->err = malloc(strlen(v->err) + 1); strcpy(x->err, v->err); break;
case LVAL_SYM: x->sym = malloc(strlen(v->sym) + 1); strcpy(x->sym, v->sym); break;
case LVAL_SEXPR:
case LVAL_QEXPR:
x->count = v->count;
x->cell = malloc(sizeof(lval*) * x->count);
for (int i = 0; i < x->count; i++) {
x->cell[i] = lval_copy(v->cell[i]);
}
break;
}
return x;
}
lval* lval_add(lval* v, lval* x) {
v->count++;
v->cell = realloc(v->cell, sizeof(lval*) * v->count);
v->cell[v->count-1] = x;
return v;
}
lval* lval_join(lval* x, lval* y) {
for (int i = 0; i < y->count; i++) {
x = lval_add(x, y->cell[i]);
}
free(y->cell);
free(y);
return x;
}
lval* lval_pop(lval* v, int i) {
lval* x = v->cell[i];
memmove(&v->cell[i], &v->cell[i+1], sizeof(lval*) * (v->count-i-1));
v->count--;
v->cell = realloc(v->cell, sizeof(lval*) * v->count);
return x;
}
lval* lval_take(lval* v, int i) {
lval* x = lval_pop(v, i);
lval_del(v);
return x;
}
void lval_print(lval* v);
void lval_print_expr(lval* v, char open, char close) {
putchar(open);
for (int i = 0; i < v->count; i++) {
lval_print(v->cell[i]);
if (i != (v->count-1)) {
putchar(' ');
}
}
putchar(close);
}
void lval_print(lval* v) {
switch (v->type) {
case LVAL_FUN:
if (v->builtin) {
printf("<builtin>");
} else {
printf("(\\ "); lval_print(v->formals); putchar(' '); lval_print(v->body); putchar(')');
}
break;
case LVAL_NUM: printf("%li", v->num); break;
case LVAL_ERR: printf("Error: %s", v->err); break;
case LVAL_SYM: printf("%s", v->sym); break;
case LVAL_SEXPR: lval_print_expr(v, '(', ')'); break;
case LVAL_QEXPR: lval_print_expr(v, '{', '}'); break;
}
}
void lval_println(lval* v) { lval_print(v); putchar('\n'); }
char* ltype_name(int t) {
switch(t) {
case LVAL_FUN: return "Function";
case LVAL_NUM: return "Number";
case LVAL_ERR: return "Error";
case LVAL_SYM: return "Symbol";
case LVAL_SEXPR: return "S-Expression";
case LVAL_QEXPR: return "Q-Expression";
default: return "Unknown";
}
}
/* Lisp Environment */
struct lenv {
lenv* par;
int count;
char** syms;
lval** vals;
};
lenv* lenv_new(void) {
lenv* e = malloc(sizeof(lenv));
e->par = NULL;
e->count = 0;
e->syms = NULL;
e->vals = NULL;
return e;
}
void lenv_del(lenv* e) {
for (int i = 0; i < e->count; i++) {
free(e->syms[i]);
lval_del(e->vals[i]);
}
free(e->syms);
free(e->vals);
free(e);
}
lenv* lenv_copy(lenv* e) {
lenv* n = malloc(sizeof(lenv));
n->par = e->par;
n->count = e->count;
n->syms = malloc(sizeof(char*) * n->count);
n->vals = malloc(sizeof(lval*) * n->count);
for (int i = 0; i < e->count; i++) {
n->syms[i] = malloc(strlen(e->syms[i]) + 1);
strcpy(n->syms[i], e->syms[i]);
n->vals[i] = lval_copy(e->vals[i]);
}
return n;
}
lval* lenv_get(lenv* e, lval* k) {
for (int i = 0; i < e->count; i++) {
if (strcmp(e->syms[i], k->sym) == 0) { return lval_copy(e->vals[i]); }
}
/* If no symbol check in parent otherwise error */
if (e->par) {
return lenv_get(e->par, k);
} else {
return lval_err("Unbound Symbol '%s'", k->sym);
}
}
void lenv_put(lenv* e, lval* k, lval* v) {
for (int i = 0; i < e->count; i++) {
if (strcmp(e->syms[i], k->sym) == 0) {
lval_del(e->vals[i]);
e->vals[i] = lval_copy(v);
e->syms[i] = realloc(e->syms[i], strlen(k->sym)+1);
strcpy(e->syms[i], k->sym);
return;
}
}
e->count++;
e->vals = realloc(e->vals, sizeof(lval*) * e->count);
e->syms = realloc(e->syms, sizeof(char*) * e->count);
e->vals[e->count-1] = lval_copy(v);
e->syms[e->count-1] = malloc(strlen(k->sym)+1);
strcpy(e->syms[e->count-1], k->sym);
}
void lenv_def(lenv* e, lval* k, lval* v) {
/* Iterate till e has no parent */
while (e->par) { e = e->par; }
/* Put value in e */
lenv_put(e, k, v);
}
/* Builtins */
#define LASSERT(args, cond, fmt, ...) \
if (!(cond)) { lval* err = lval_err(fmt, ##__VA_ARGS__); lval_del(args); return err; }
#define LASSERT_TYPE(func, args, index, expect) \
LASSERT(args, args->cell[index]->type == expect, \
"Function '%s' passed incorrect type for argument %i. Got %s, Expected %s.", \
func, index, ltype_name(args->cell[index]->type), ltype_name(expect))
#define LASSERT_NUM(func, args, num) \
LASSERT(args, args->count == num, \
"Function '%s' passed incorrect number of arguments. Got %i, Expected %i.", \
func, args->count, num)
#define LASSERT_NOT_EMPTY(func, args, index) \
LASSERT(args, args->cell[index]->count != 0, \
"Function '%s' passed {} for argument %i.", func, index);
lval* lval_eval(lenv* e, lval* v);
lval* builtin_lambda(lenv* e, lval* a) {
/* Check Two arguments, each of which are Q-Expressions */
LASSERT_NUM("\\", a, 2);
LASSERT_TYPE("\\", a, 0, LVAL_QEXPR);
LASSERT_TYPE("\\", a, 1, LVAL_QEXPR);
/* Check first Q-Expression contains only Symbols */
for (int i = 0; i < a->cell[0]->count; i++) {
LASSERT(a, (a->cell[0]->cell[i]->type == LVAL_SYM),
"Cannot define non-symbol. Got %s, Expected %s.",
ltype_name(a->cell[0]->cell[i]->type), ltype_name(LVAL_SYM));
}
/* Pop first two arguments and pass them to lval_lambda */
lval* formals = lval_pop(a, 0);
lval* body = lval_pop(a, 0);
lval_del(a);
return lval_lambda(formals, body);
}
lval* builtin_list(lenv* e, lval* a) {
a->type = LVAL_QEXPR;
return a;
}
lval* builtin_head(lenv* e, lval* a) {
LASSERT_NUM("head", a, 1);
LASSERT_TYPE("head", a, 0, LVAL_QEXPR);
LASSERT_NOT_EMPTY("head", a, 0);
lval* v = lval_take(a, 0);
while (v->count > 1) { lval_del(lval_pop(v, 1)); }
return v;
}
lval* builtin_tail(lenv* e, lval* a) {
LASSERT_NUM("tail", a, 1);
LASSERT_TYPE("tail", a, 0, LVAL_QEXPR);
LASSERT_NOT_EMPTY("tail", a, 0);
lval* v = lval_take(a, 0);
lval_del(lval_pop(v, 0));
return v;
}
lval* builtin_eval(lenv* e, lval* a) {
LASSERT_NUM("eval", a, 1);
LASSERT_TYPE("tail", a, 0, LVAL_QEXPR);
lval* x = lval_take(a, 0);
x->type = LVAL_SEXPR;
return lval_eval(e, x);
}
lval* builtin_join(lenv* e, lval* a) {
for (int i = 0; i < a->count; i++) { LASSERT_TYPE("join", a, i, LVAL_QEXPR); }
lval* x = lval_pop(a, 0);
while (a->count) {
lval* y = lval_pop(a, 0);
x = lval_join(x, y);
}
lval_del(a);
return x;
}
lval* builtin_op(lenv* e, lval* a, char* op) {
for (int i = 0; i < a->count; i++) { LASSERT_TYPE(op, a, i, LVAL_NUM); }
lval* x = lval_pop(a, 0);
if ((strcmp(op, "-") == 0) && a->count == 0) { x->num = -x->num; }
while (a->count > 0) {
lval* y = lval_pop(a, 0);
if (strcmp(op, "+") == 0) { x->num += y->num; }
if (strcmp(op, "-") == 0) { x->num -= y->num; }
if (strcmp(op, "*") == 0) { x->num *= y->num; }
if (strcmp(op, "/") == 0) {
if (y->num != 0) {
lval_del(x); lval_del(y); lval_del(a);
return lval_err("Division By Zero.");
}
x->num /= y->num;
}
lval_del(y);
}
lval_del(a);
return x;
}
lval* builtin_add(lenv* e, lval* a) { return builtin_op(e, a, "+"); }
lval* builtin_sub(lenv* e, lval* a) { return builtin_op(e, a, "-"); }
lval* builtin_mul(lenv* e, lval* a) { return builtin_op(e, a, "*"); }
lval* builtin_div(lenv* e, lval* a) { return builtin_op(e, a, "/"); }
lval* builtin_var(lenv* e, lval* a, char* func) {
LASSERT_TYPE(func, a, 0, LVAL_QEXPR);
lval* syms = a->cell[0];
for (int i = 0; i < syms->count; i++) {
LASSERT(a, (syms->cell[i]->type == LVAL_SYM),
"Function '%s' cannot define non-symbol. Got %s, Expected %s.",
func, ltype_name(syms->cell[i]->type), ltype_name(LVAL_SYM));
}
LASSERT(a, (syms->count == a->count-1),
"Function '%s' passed too many arguments for symbols. Got %i, Expected %i.",
func, syms->count, a->count-1);
for (int i = 0; i < syms->count; i++) {
/* If 'def' define in global scope. If 'put' define in local scope */
if (strcmp(func, "def") == 0) { lenv_def(e, syms->cell[i], a->cell[i+1]); }
if (strcmp(func, "=") == 0) { lenv_put(e, syms->cell[i], a->cell[i+1]); }
}