-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmplutil.tex
3129 lines (2862 loc) · 156 KB
/
cmplutil.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass{article}
\usepackage{sdpgf}
\usepackage{amsmath,amsfonts,amssymb}
\usepackage{tikz}
\usetikzlibrary{shapes.multipart,calc}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Z}{\mathbb{Z}}
\usepackage{amsthm}
% \newtheorem{theorem}{Theorem}
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{corollary}[theorem]{Corollary}
% \newtheorem{conjecture}[theorem]{Conjecture}
% \theoremstyle{definition}
% \newtheorem{definition}[theorem]{Definition}
% \newtheorem{example}[theorem]{Example}
\theoremstyle{definition}
\newtheorem{topic}{Topic}
\newcommand{\Tcl}{T\kern-0.1em cl}
\newcommand{\word}[1]{%
\relax
\ifmmode
\{\text{\normalfont\itshape #1}\}%
\else
$\{$\textnormal{\itshape #1}$\}$%
\fi
}
\newcommand{\meta}[1]{%
\relax
\ifmmode
\langle\text{\normalfont\itshape #1}\rangle%
\else
$\langle$\textnormal{\itshape #1}$\rangle$%
\fi
}
\newcommand{\regstar}{\ensuremath{^*}}
\newcommand{\regplus}{\ensuremath{^+}}
\newcommand{\regopt}{\ensuremath{^?}}
\providecommand{\url}[1]{\texttt{#1}}
\newenvironment{ttdescription}{%
\description
\expandafter\def \expandafter\makelabel \expandafter##%
\expandafter1\expandafter{\makelabel{\texttt{##1}}}%
}{\enddescription}
\makeatletter
\DeclareRobustCommand\SMC{%
\ifx\@currsize\normalsize\small\else
\ifx\@currsize\small\footnotesize\else
\ifx\@currsize\footnotesize\scriptsize\else
\ifx\@currsize\large\normalsize\else
\ifx\@currsize\Large\large\else
\ifx\@currsize\LARGE\Large\else
\ifx\@currsize\scriptsize\tiny\else
\ifx\@currsize\tiny\tiny\else
\ifx\@currsize\huge\LARGE\else
\ifx\@currsize\Huge\huge\else
\small\SMC@unknown@warning
\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
}
\newcommand{\SMC@unknown@warning}{\TBWarning{\string\SMC: unrecognised
text font size command -- using \string\small}}
\makeatother
\DeclareTextFontCommand{\smaller}{\SMC}
% \DeclareTextFontCommand{\smalltexttt}{\SMC\ttfamily}
% \newcommand{\PROP}{\texorpdfstring{\smaller{PROP}}{PROP}}
\newcommand{\PROP}{\smaller{PROP}}
\newcommand{\PROPs}{\PROP s}
\newcommand{\makebraceother}{%
\catcode`\{=12\catcode`\}=12\relax
}
\newcommand{\makebracegroups}{%
\catcode`\{=1\catcode`\}=2\relax
}
\hyphenation{white-space}
\begin{document}
\title{Network rewriting utility description}
\author{Lars Hellstr\"om}
\maketitle
\begin{abstract}
This paper describes, as a tool used in scientific exploration,
the author's computer program for doing network rewriting
calculations.
\end{abstract}
\section{Introduction}
\label{Sec:Introduction}
The git repository at \url{https://github.com/lars-hellstrom/algebra}
contains a variety of packages I have written over the years, to
facilitate research-related computations within in particular higher
algebra. Despite this codebase not having been created with one
particular goal in mind, quite a lot of it comes together in one
program: the \emph{network rewriting completion utility}.
In spirit, this program is quite similar to the seminal
Knuth--Bendix completion utility~\cite{KnuthBendix}, but with the
important difference that it works with \emph{networks} (a kind of
Directed Acyclic Graph) rather than treelike \emph{terms} as the
objects of rewriting. This permits exploring many novel algebraic
theories, for example that of bi- and Hopf algebras, that defy even
being formulated within the language of classical term rewriting due
to the presence of co-operations such as $\Delta$ and $\varepsilon$.
On the one hand this transition from trees to \smaller{DAG}s is a
radical proposal that requires rebuilding from scratch most of the
mathematical formula language, but on the other hand the overall
structure of the computation is the familiar completion of a rewrite
system, and the abstract theory of that remains applicable.
Without already getting into the technicalities, the purpose of the
program can be characterised as \emph{theory exploration}. It starts
from three pieces of information:
\begin{enumerate}
\item
a \emph{signature} listing the operations (and co-operations) in
the theory---for example a hom-algebra\footnote{
See for example \cite{HMS} for an introduction to hom-algebras
and references to further literature.
} has one binary operation
called `multiplication' and one unary operation known as `the
hom'---which determines the language of expressions in the theory
to explore,
\item
the \emph{axioms} of the theory, in the form of a set of given
equalities \(l = r\) between expressions in the theory,
\item
an \emph{ordering} of the expressions in the theory, which is
used to orient equalities into rewrite rules---the smaller side
in an equality is considered to be ``simpler'' as in `more
simplified'.
\end{enumerate}
One intermediate operation the program can use this information for
is to \emph{reduce} arbitrary expressions in the theory by rewriting
them to a ``maximally simplied'' \emph{normal form}, by applying
known equalities of expressions. A higher level operation is to seek
new \emph{nontrivial} equalities by constructing expressions that can
be simplified in several different ways (\emph{ambiguities}), and then
checking whether both lead to the same normal form; if not, the
program has discovered a lemma which it adds to its database of known
equalities in the theory under consideration. New equalities give
rise to new reductions and potentially new ambiguities, which in turn
may produce new lemmas; this \emph{completion} process need not
terminate. Therefore the user would typically start the program, let
it run for a while, and then inspect what lemmas have been
discovered; there are several interfaces for this
(Subsection~\ref{Ssec:Introspection}). The results can be
exported, likewise in several forms. The completion process can
be stopped and resumed at a later time, should the user so desire.
As a concrete example, if one wishes to explore the theory of
hom-associative algebras using this program then one would feed it
\[
\text{the signature }
\Omega = \left\{
\begin{sdpgf}{0}{0}{30}{-62}{0.4pt}
\m 15 -60 \L 0 21 \S \m 9 -25 \C -5 5 4 10 0 7 \S \m 21 -25 \C 5
5 -3 10 0 7 \S \ov 7 -39 16 16 \S
\end{sdpgf}
,
\begin{sdpgf}{0}{0}{30}{-62}{0.4pt}
\m 15 -60 \L 0 21 \S \m 15 -23 \L 0 20 \S \re 7 -39 16 16 \S
\end{sdpgf}
\right\}
\quad\text{with axiom}\quad
\left[ \begin{sdpgf}{0}{0}{120}{-196}{0.2pt}
\m 60 -191 \L 0 41 \S \m 30 -46 \L 0 41 \S \m 79 -51 \C -12 12 -7
18 0 16 \S \m 101 -51 \C 14 14 -25 17 0 15 \S \m 49 -123 \C -12 12
-7 17 0 16 \S \m 71 -123 \C 12 12 7 17 0 16 \S \ov 44 -150 32 32
\S \re 14 -78 32 32 \S \ov 74 -78 32 32 \S
\end{sdpgf} \right]
\equiv
\left[ \begin{sdpgf}{0}{0}{120}{-196}{0.2pt}
\m 60 -191 \L 0 41 \S \m 19 -51 \C -14 14 25 17 0 15 \S \m 41 -51
\C 12 12 7 18 0 16 \S \m 90 -46 \L 0 41 \S \m 49 -123 \C -12 12 -7
17 0 16 \S \m 71 -123 \C 12 12 7 17 0 16 \S \ov 44 -150 32 32 \S
\ov 14 -78 32 32 \S \re 74 -78 32 32 \S
\end{sdpgf} \right]
\]
where however it should be observed that the exact representation of
these things is a nontrivial matter (which we shall return to in
Subsections~\ref{Ssec:PureNetwork} and~\ref{Ssec:Database}). The first
lemma proved is
\begin{displaymath}
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt}
\m 53 -59 \C 0 7 7 6 0 7 \S \m 66 -25 \C 7 7 -13 8 0 7 \S \m
38 -132 \L 0 21 \S \m 32 -97 \C -6 5 -3 9 0 8 \S \m 17 -61 \C
-7 6 -2 10 0 9 \L 0 10 \C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9
0 8 \S \m 30 -23 \L 0 20 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 54
-25 \C -6 6 -3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16
\S \re 22 -39 16 16 \S \ov 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\equiv
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt}
\m 53 -59 \C 0 7 7 6 0 7
\S \m 60 -23 \L 0 20 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 17 -61 \C -7 6 -2 10 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 24 -25
\C -7 7 13 8 0 7 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 36 -25
\C 6 6 3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16 \S
\ov 22 -39 16 16 \S \re 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\text{,}
\end{displaymath}
and the proof is merely
\begin{displaymath}
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt}
\m 53 -59 \C 0 7 7 6 0 7 \S \m 66 -25 \C 7 7 -13 8 0 7 \S \m
38 -132 \L 0 21 \S \m 32 -97 \C -6 5 -3 9 0 8 \S \m 17 -61 \C
-7 6 -2 10 0 9 \L 0 10 \C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9
0 8 \S \m 30 -23 \L 0 20 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 54
-25 \C -6 6 -3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16
\S \re 22 -39 16 16 \S \ov 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\equiv
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 58 -61 \C 6 5 -4 9 0 8
\S \m 66 -25 \C 7 7 -13 8 0 7 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 23 -59 \C 0 9 -15 5 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 30 -23 \L 0 20
\S \m 47 -61 \C -7 6 -10 7 0 9 \S \m 54 -25 \C -6 6 -3 8 0 8 \S
\ov 30 -111 16 16 \S \re 15 -75 16 16 \S \re 22 -39 16 16 \S
\ov 52 -39 16 16 \S \ov 45 -75 16 16 \S \end{sdpgf} \right]
\equiv
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 58 -61 \C 6 5 -4 9 0 8
\S \m 60 -23 \L 0 20 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 23 -59 \C 0 9 -15 5 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 24 -25
\C -7 7 13 8 0 7 \S \m 47 -61 \C -7 6 -10 7 0 9 \S \m 36 -25
\C 6 6 3 8 0 8 \S \ov 30 -111 16 16 \S \re 15 -75 16 16 \S
\ov 22 -39 16 16 \S \re 52 -39 16 16 \S \ov 45 -75 16 16 \S
\end{sdpgf} \right]
\equiv
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 53 -59 \C 0 7 7 6 0 7
\S \m 60 -23 \L 0 20 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 17 -61 \C -7 6 -2 10 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 24 -25
\C -7 7 13 8 0 7 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 36 -25
\C 6 6 3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16 \S
\ov 22 -39 16 16 \S \re 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\text{.}
\end{displaymath}
The program discovers this by first constructing the second of these
steps as the site of an ambiguity---an expression which can be
rewritten in two different ways---and then reducing either to a
normal form:
\begin{displaymath}
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt}
\m 53 -59 \C 0 7 7 6 0 7 \S \m 66 -25 \C 7 7 -13 8 0 7 \S \m
38 -132 \L 0 21 \S \m 32 -97 \C -6 5 -3 9 0 8 \S \m 17 -61 \C
-7 6 -2 10 0 9 \L 0 10 \C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9
0 8 \S \m 30 -23 \L 0 20 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 54
-25 \C -6 6 -3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16
\S \re 22 -39 16 16 \S \ov 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\leftarrow
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 58 -61 \C 6 5 -4 9 0 8
\S \m 66 -25 \C 7 7 -13 8 0 7 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 23 -59 \C 0 9 -15 5 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 30 -23 \L 0 20
\S \m 47 -61 \C -7 6 -10 7 0 9 \S \m 54 -25 \C -6 6 -3 8 0 8 \S
\ov 30 -111 16 16 \S \re 15 -75 16 16 \S \re 22 -39 16 16 \S
\ov 52 -39 16 16 \S \ov 45 -75 16 16 \S \end{sdpgf} \right]
\rightarrow
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 58 -61 \C 6 5 -4 9 0 8
\S \m 60 -23 \L 0 20 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 23 -59 \C 0 9 -15 5 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 24 -25
\C -7 7 13 8 0 7 \S \m 47 -61 \C -7 6 -10 7 0 9 \S \m 36 -25
\C 6 6 3 8 0 8 \S \ov 30 -111 16 16 \S \re 15 -75 16 16 \S
\ov 22 -39 16 16 \S \re 52 -39 16 16 \S \ov 45 -75 16 16 \S
\end{sdpgf} \right]
\rightarrow
\left[ \begin{sdpgf}{0}{0}{75}{-134}{0.3pt} \m 53 -59 \C 0 7 7 6 0 7
\S \m 60 -23 \L 0 20 \S \m 38 -132 \L 0 21 \S \m 32 -97
\C -6 5 -3 9 0 8 \S \m 17 -61 \C -7 6 -2 10 0 9 \L 0 10
\C 0 8 7 7 0 8 \S \m 43 -97 \C 6 5 4 9 0 8 \S \m 24 -25
\C -7 7 13 8 0 7 \S \m 28 -61 \C 6 5 -4 9 0 8 \S \m 36 -25
\C 6 6 3 8 0 8 \S \ov 30 -111 16 16 \S \ov 15 -75 16 16 \S
\ov 22 -39 16 16 \S \re 52 -39 16 16 \S \re 45 -75 16 16 \S
\end{sdpgf} \right]
\text{.}
\end{displaymath}
The two outermost steps here are not equal, but they are equivalent
modulo the given axiom despite both being normal forms with respect
to the rewrite rule
\begin{math}
\left[ \begin{sdpgf}{0}{0}{120}{-196}{0.15pt}
\m 60 -191 \L 0 41 \S \m 30 -46 \L 0 41 \S \m 79 -51 \C -12 12 -7
18 0 16 \S \m 101 -51 \C 14 14 -25 17 0 15 \S \m 49 -123 \C -12 12
-7 17 0 16 \S \m 71 -123 \C 12 12 7 17 0 16 \S \ov 44 -150 32 32
\S \re 14 -78 32 32 \S \ov 74 -78 32 32 \S
\end{sdpgf} \right]
\rightarrow
\left[ \begin{sdpgf}{0}{0}{120}{-196}{0.15pt}
\m 60 -191 \L 0 41 \S \m 19 -51 \C -14 14 25 17 0 15 \S \m 41 -51
\C 12 12 7 18 0 16 \S \m 90 -46 \L 0 41 \S \m 49 -123 \C -12 12 -7
17 0 16 \S \m 71 -123 \C 12 12 7 17 0 16 \S \ov 44 -150 32 32 \S
\ov 14 -78 32 32 \S \re 74 -78 32 32 \S
\end{sdpgf} \right]
\end{math},
so apparently their equivalence is a nontrivial consequence of the
axiom, and thus worth turning in a lemma, which in turn gives rise to
a new rewrite rule, that on the one hand changes which expressions are
normal forms and on the other hand gets involved in additional
ambiguities. Thus the cycle goes on.
Mathematically, the hardness of the results produced by the program
varies depending on what kind of results these are. The individual
lemmas are quite solid, as the program records all steps taken in
deriving them and can convert these into an explicit proof. Results
on the quotient by the given axioms typically require more manual
work to be rigorously established; basic rewriting theory
conditions such conclusions upon having a confluent rewrite system,
which one would only know after running the completion procedure to
termination, however for fundamental logical reasons (it would decide
the Halting Problem) that is not always possible, and in practice it
also can be infeasible. None the less it can be possible to draw
conclusions from incomplete calculations, for example if they suggest
a verifiable conjecture about what the completed rewrite system would
look like~\cite{NR2}, or if it can be shown that sufficiently small
normal forms will remain so forever since any lemmas which still
remain to be discovered are all too large to have an effect on
them~\cite{HMS}.
\medskip
Since this is a \emph{tool description} rather than a traditional
mathematics paper, there are no theorems proved, nor any mathematical
results formally stated. What there is that rises above the main body
text are a number of remarks, labelled ``Topics'', which address some
specific issue that was encountered during the development of the
utility. These constitute the main contribution of this paper to the
mathematical literature, in the sense that they should be of use to
anyone wishing to separately reproduce functionality of the rewriting
utility; we should bear in mind that reproducibility is a scientific
virtue.
This paper is \emph{not} a user's manual for the rewriting utility,
even if it at times mentions details on how a user might accomplish
some operation. The utility is not yet in such a polished form that
one could hope to make use of it without knowledge of its internal
composition.
\section{Program composition}
\label{Sec:Program}
By the trivial lines-of-code metric, the completion utility in 2015
consisted of 11931 lines of Tcl~code, but that is perhaps most useful
as a datapoint if comparing several different pieces of software. For
a party actually interested in \emph{reading} the program, it is more
relevant to know that it is a Literate~\cite{Knuth:Literate} program
written in the \textsf{doc}\slash \textsf{docstrip}
tradition~\cite{Mittelbach:TB10-2-245} (like the \LaTeX\ kernel and
most \LaTeX\ packages), and that the sources therefore can be typeset
straight off by \LaTeX. At the time of writing, doing that to the two
main source files produces documents of~212 and~149 pages,
respectively.
The above 11931~lines of code figure also includes code from various
supporting packages of more generic utility. The typeset literate
sources here could add another \(154 + 36 + 83 + 41 = 314\) or so pages
to the total (and there are additional bodies of code under
development that could eventually become additional supporting
packages), but understanding the workings of the completion utility
would for the most part not require understanding the workings of
those supporting packages. Conversely, rigorously understanding the
operation of the utility as a whole probably does require familiarity
with the underlying mathematical theory of network rewriting, which
as presented in~\cite{NR1} adds another 188~pages to the reading
list; that paper began as the opening section of the completion
utility literate sources, but was split off since it grew a bit
beyond the initial expectations.
\subsection{Development history}
The completion utility originated in work carried out during my
postdoc year at the Mittag-Leffler instistute (2003--04,
Noncommutative Geometry), specifically the realisation that the
\emph{Network Algebra} of \c{S}tef\u{a}nescu~\cite{Stefanescu} could
be used as a formalism for expressions in Hopf algebras. While in
hindsight not that spectacular as a find---shorthand diagrams had
been known for decades to specialists in the field, even if often
downplayed as ``not really formulae''---the high degree of formality
of \c{S}tef\u{a}nescu's networks made it clear that these could serve
as building blocks for a universal-algebraic study of Hopf algebras
(and other algebraic structures with co-operations). Would it for
example be possible to start with just the axioms and by pure
calculations (as opposed to by peeking at the group theory proof that
\((ab)^{-1} = b^{-1} a^{-1}\)) discover the fact that the antipode is
an antihomomorphism? Initial attempts were hindered by the detail
that networks, although excellent for showing the structure of
expressions, are not as compact as traditional formulae; sometimes a
mere four steps would fill up an entire sheet of paper! What to do if
one does not want to be on a constant hunt for more area to write on?
Erasable media? Well, the screen of a computer can be redrawn several
times a second, so that's a neat way forward.
\textbf{Version~0} of the program was written over the summer of
2004, and had at best an ambiguous aim. On the one hand it did perform
completion of e.g.~the Hopf algebra axioms, but on the other hand it
is perhaps more fair to describe the rewrite operations on networks
as its primary accomplishment, and the completion merely as a way of
generating a stream of tasks on which to test these more basic
operations. Notably the program ran at the speed of one rewrite step
per second and would display each intermediate result as it did so,
to give the user a chance to supervise what was being done and verify
its correctness.
Operations that were implemented included searching for instances of
one network as a subnetwork of another, replacing such an instance by
a different network, testing whether two networks are equal\slash
isomorphic, construction of ambiguities (enumerating all ways in
which two networks can overlap), and dropping rules that become the
larger part of an inclusion ambiguity. An operation not implemented
in any rigorous way was that of comparing two networks to decide the
orientation of a new rule---this decision was typically left to the
user, making the completion semi-automated at best. As a curiousity,
it can also be mentioned that the completion process was run through
the windowing system event loop: each ambiguity would get its own
toplevel window, calculations were carried out in the window that
currently had focus, and once an ambiguity had been resolved the
corresponding window would be closed (thus yielding focus to the next
window).
About half of the program, and even more of the development effort,
was spent on the graphical rendering of networks; graph drawing in
general is a nontrivial problem, and networks have special
requirements in that it is preferred that vertices connected to each
other in a particular way also are positioned correspondingly.
General graph drawing heuristics were tried but found to be mostly
unsuitable, and in the end positions were found by running a sort of
simulation where ``tensions'' in the edges would cause vertices to
move to more appropriate positions. Since such vertex and edge
positions were expensive to compute, they were being stored as part
of the network data structure.
An outright mathematical flaw of Version~0 was that it for certain
inputs would produce non-acyclic networks. This prompted the
introduction of ``feedbacks'' in the formalism, but can also be
viewed as the first sign of wrap ambiguities~\cite{LH:IWC}.
\textbf{Version~1} was a thorough rewrite begun in~2006. It split the
utility into one library (\texttt{network2}) of operations
specifically on networks and a program (\texttt{cmplutil1}) whose aim
was clearly that of completion. Work was still dispatched via the
windowing system event loop as each step continued to be shown
graphically as it was performed, but now the program actually could
order networks according to a mathematically rigorous criterion.
In the library portion, the network datatype was split into several.
The core datatype is that of a \emph{pure} network which implements
the formal mathematical concept~\cite[Def.~5.1]{NR1} of a network,
whereas the graphical information is moved to to a datatype of
\emph{rich} networks wrapped around the pure ones. In addition there
is a new concept of \emph{network with feedback} which combines a
pure network with a nominal transferrence~\cite[Def.~6.14]{NR1}; the
latter is needed to ensure that rewrite operations preserve
acyclicity.
Most of the old code for assigning positions to vertices and edges
was scrapped, and a variety of new heuristics based on generating a
layout were implemented; sensible (even if not always optimal)
layouts could be generated as needed, so there was no longer any need
for long-term storage of the graphical information with the networks.
Old code for exporting presentations could instead be migrated to the
new datatypes, thereby preserving all existing features.
Over time the library also grew to include rigorous operations for
ordering networks, as appropriate mathematics was being developed;
late in the development cycle it was discovered that much of this
could be greatly simplified.
\textbf{Version~2} became operational in 2009 and is a rewrite of the
completion utility employing the same network operations library as
version~1. Major novelties include:
\begin{itemize}
\item
using an SQLite3 database for storing all rules and other aspects
of the state of the completion,
\item
capable of running without a \smaller{GUI},
\item
web server interface for viewing state of computations, and
\item
expressions can be formal linear combinations of networks.
\end{itemize}
It is primarily this version that is described in
Section~\ref{Sec:CompletionUtil} below.
\subsection{Implementation language}
Tcl~\cite{Ousterhout} is a quite simple language, with several traits
that are attractive from a mathematical rigor point of view, but
since it is not one of the major languages for scientific computing or
in computer science, it warrants being elaborated upon.
First and foremost, \Tcl\ requires that every value has a well-defined
serialisa\-tion\hspace*{0pt}---the so-called
\emph{string representation}---and that
the semantics of a value are determined entirely by its string
representation; this is a principle known as \emph{everything is a
string}~(\smaller{EIAS}). This does not require that all values are
also stored as strings; ever since \Tcl~8.0~(1997) values can also
have an \emph{internal representation} native to the computing
hardware, and many values in a running \Tcl\ program are born~(created),
live~(accessed), and die~(deallocated) without the string
representation ever being realised, since it is sufficient that the
\emph{potential} to realise it always exists. \smaller{EIAS} does
however imply that values are \emph{immutable}---once created,
guaranteed to permanently have the same string representation---since
the outcome of a test whether a value is equal to a specific literal
string constant must depend only on the string representation of the
value, not on details of how it is represented in memory or what
might have previously been done to it. This is an excellent match for
the Platonic ideal of mathematical objects existing (at least
\emph{in potentia}) unchangeable and forever, beyond the material
realm.
The above does not mean \Tcl\ only has constants. A \emph{variable}
is a location where a value can be stored, and it is perfectly fine
to change which value is stored in a variable. There are even
primitive operations which modify the contents of a variable precisely
in that they change a specific part (e.g.~one element of a list) of
the value this variable holds, and if the variable does not share the
data structure storing that value with anything else (which it would
do if that value is assigned also to a second variable) then this data
structure can be modified in place; otherwise the modifying operation
makes a shallow copy of the original value and modifies this copy
before assigning it to the variable. These semantics of immutable
values---that seemingly mutating operations really make shallow
copies---are important for correctly interpreting some of the more
intricate operations in the completion utility (in particular those
that perform combinatorial searches); a naive port of these algorithms
to languages with mutable value semantics is likely to malfunction.
Second, \Tcl\ has a very simple syntax. A \Tcl\ program, or
\emph{script}, consists of a sequence of \emph{sentences}.\footnote{
The official documentation uses the term `command' for this, but
also for the first word of a sentence; I'm using `sentence' to
avoid this ambiguity. Likewise what I call a \emph{sentence prefix}
later---a list of values meant to become the initial words of a
sentence---is normally called a `command prefix'.
} Sentences usually appear one per line (newline is a sentence
separator), although one may put several on a single line by separating
them with semicolons. A sentence is in turn a sequence of
\emph{words}, separated by whitespace. The first word of a sentence
is the name of the \emph{command} performed by that sentence, and the
remaining words constitute the \emph{arguments} of that command. What
allows this to be a structured programming language is that words in
turn may be anything, including entire scripts, if appropriately
delimited; a control structure (e.g.~for loop) is simply a command
which expects some of its arguments to be entire scripts themselves.
Words that begin with a left brace `\verb|{|' and end with the matching
right brace `\verb|}|' denote the exact sequence of characters between
(but not including) these two delimiters, regardless of whether these
characters would include newlines, other whitespace, or semicolons;
this makes it trivial to nest ``blocks'' of \Tcl\ code as single words
of sentences, to an arbitrary depth. A word without delimiters (a
\emph{bareword}) can contain whitespace and semicolons, but only if
each special character is individually escaped by a backslash
character `\verb|\|'; this is unusual. Finally, a word beginning with
a quote character `\verb|"|' ends with the next (unescaped) quote
character, and can likewise contain whitespace (even newlines) and
semicolons without individual escaping, but here nesting is
impractical, so quote-delimited words are mostly used for text
strings.
What gets passed to a command for processing is always the runtime
value of a word. Brace-delimited words have their explicit constant
values, but other words undergo \emph{substitution} before they
become a command argument; most commonly the entire value of a word
comes from a single substitution, though in quote-delimited words it
is not uncommon that constant and variable pieces are combined when
forming the runtime value, e.g.
\begin{verbatim}
log message "Element $i is: [lindex $L $i]"
\end{verbatim}
The first substitution form is
\emph{variable substitution}: a dollar sign \verb|$| followed by the
name of a variable is replaced by the current value of that variable.
The second substitution form is \emph{command substitution}:
everything from a left bracket \verb|[| up to the matching right
bracket \verb|]| is interpreted as a (sequence of) \Tcl\ sentence(s),
that gets evaluated, and the value returned by the command of the
(final) sentence upon invocation is what becomes the runtime value of
this bracket construction. As a somewhat silly example, the sentence
\begin{verbatim}
list 1 [set i 2] $i i [incr i] $i [incr i]
\end{verbatim}
will have the argument list `\texttt{1 2 2 i 3 3 4}' (seven
arguments) passed to the \texttt{list} command. The first word has
\texttt{1} as explicit constant value. The second word is a command
substitution, and the \texttt{set} command there primarily sets the
variable \texttt{i} to \texttt{2} (a side-effect), but \texttt{set}
also returns the new variable value, so this word has runtime value
\texttt{2}. The third word \verb|$i| is a straight variable
substition, and since we know \texttt{i} is now \texttt{2}, that will
again be the runtime value also of this word. The fourth word
\texttt{i} has nothing to trigger substitution, so its runtime value
is just \texttt{i} (name of a variable, rather than its value). The
fifth word \texttt{[incr~i]} triggers a command substitution
(uncharacteristically, again with a command \texttt{incr} whose
primary purpose is to achieve a side-effect, namely to increment the
value of the variable \texttt{i}), so the value of \texttt{i} steps
up to \texttt{3} and this is also the runtime value of this word. The
sixth word \verb|$i| likewise reports this new \texttt{i} value of
\texttt{3}, before another command substitution in the last word
increments \texttt{i} again, to the final value of \texttt{4}.
Thus briefed on the language syntax, we are ready to analyse an
example illustrating the first point about immutable values and
shallow copying of internal representations. The script
\begin{verbatim}
set row [list]
for {set k 0} {$k<$n} {incr k} {
lappend row 0
}
set Z [list]
for {set k 0} {$k<$n} {incr k} {
lappend Z $row
}
\end{verbatim}
sets variable \verb|Z| to the $n \times n$ zero matrix (not in the
easiest way possible, but in a way that follows common imperative
style)---the first four lines make \verb|row| a list of $n$ zeroes
by appending one $0$ to this list at each iteration of the loop, and
then the last four lines make \verb|Z| a list of $n$ such rows of zeroes
by appending the value of \verb|row| to \verb|Z| at each iteration of
the second loop. The generated data structure only has one $0$ value
(referenced $n$ times), one row-of-$n$-zeroes value (referencing the
zero $n$ times, and referenced $n$ times from the matrix value), and
one matrix (list-of-lists) value.
\begin{figure}
\[
\begin{tikzpicture}[tclobj/.style={shape=rectangle split,
rectangle split parts=#1, rectangle split horizontal, draw,
anchor=west},tclobj/.default=2,
pointer/.style={rounded corners=4pt}]
\def\partmid#1{($(#1 south)!0.5!(#1 north)$)}
\node[tclobj,name=0] at (0,2) {integer\nodepart{two}$0$};
\node[tclobj=4,name=row] at (0,1) {list};
\draw[pointer] \partmid{row.two} |- (0,1.5);
\draw[pointer] \partmid{row.three} |- (0,1.5);
\draw[pointer,->] \partmid{row.four} |- (-0.4,1.5) |- (0.west);
\node[tclobj=4,name=Z] at (0,0) {list};
\draw[pointer] \partmid{Z.two} |- (0,0.5);
\draw[pointer] \partmid{Z.three} |- (0,0.5);
\draw[pointer,->] \partmid{Z.four} |- (-0.4,0.5) |- (row.west);
\node[tclobj,name=1] at (8,1) {integer\nodepart{two}$1$};
\node[tclobj=4,name=I] at (3,-0.5) {list};
\node[tclobj=4,name=I3] at (5,0) {list};
\node[tclobj=4,name=I2] at (5,1) {list};
\node[tclobj=4,name=I1] at (5,2) {list};
\draw[pointer,->] \partmid{I.two} |- (I1.west);
\draw[pointer,->] \partmid{I.three} |- (I2.west);
\draw[pointer,->] \partmid{I.four} |- (I3.west);
\draw[pointer,->] \partmid{I1.two} |- (7.5,1.6) |- (1.west);
\draw[pointer] \partmid{I1.three} |- (5,2.5) -| (3,1.5) -- (0,1.5);
\draw[pointer] \partmid{I1.four} |- (5,2.5);
\draw[pointer] \partmid{I2.two} |- (5,1.5);
\draw[pointer] \partmid{I2.three} |- (7.5,0.6) |- (1.west);
\draw[pointer] \partmid{I2.four} |- (2.8,1.5);
\draw[pointer] \partmid{I3.two} |- (5,0.5);
\draw[pointer] \partmid{I3.three} |- (5,0.5) -| (3,1.5) -- (0,1.5);
\draw[pointer] \partmid{I3.four} |- (7.5,-0.4) |- (1.west);
\draw[pointer,->] (1,-1.5) node[below] {variable \texttt{Z}}
|- (-0.4,-1) |- (Z.west);
\draw[pointer,->] (3,-1.5) node[below] {variable \texttt{I}}
|- (2.4,-1) |- (I.west);
\end{tikzpicture}
\]
\caption{Sharing of internal representations}
\end{figure}
However, if we were to make an
identity matrix \verb|I| from \verb|Z| by changing the diagonal
elements to $1$s through the commands
\begin{verbatim}
set I $Z
for {set k 0} {$k<$n} {incr k} {
lset I $k $k 1
}
\end{verbatim}
(that \texttt{lset} does \(I_{k,k} := 1\)) then at each iteration of
the loop the targetted row of \verb|I| will be copied before its $k$th
element is changed to $1$, so the data structure storing the value of
\verb|I| ends up with $n$ separate rows (as they need to be, since their
values are distinct).
There is still only one $0$ value, but now it is referenced $n^2$
times ($n-1$ times by each of the $n$ rows in \verb$I$, and another
$n$ times by the only row in \verb$Z$, which of course remains
unchanged), and the $1$ value is referenced $n$ times (once by each
row of \verb$I$).
% The reason for explaining this in such detail is that in many other
% programming languages, the default for such modifying operations is
% rather to directly overwrite the original data, which would have
% drastically different results: first the changes made to $I$ would be
% seen also in $Z$, and if $Z$ was still cleverly reusing the same
% row-of-zeroes $n$ times, then the result of the above could very well
% be that $I$ (and $Z$) end up as the $n \times n$ matrices of all $1$s
% (since each element in the only row has been separately overwritten
% with that value). Correctly understanding the semantics of values
% being immutable is crucial for some of the more intricate algorithms
% in the completion utility; in particular the search-for-subnetwork
% and list-all-overlaps operations rely heavily upon being able to
% (cheaply) preserve the current state of a search by pushing the
% values of the relevant variables onto a stack.
\subsubsection{Syntax and invariant descriptions}
Much of the practical syntax of \Tcl\ ends up being syntaxes of
individual commands, and accordingly the official language
documentation~\cite{tcl-lang-doc-8.6} is organised as one manpage per
command. Some of these (e.g.~\verb|if|, \verb|try|) have rather complex
clause-based syntaxes whereas others are more
function-and-its-arguments (or verb-object-object-object-\dots\ if
you want to go natural-language grammatical), but common to all is
that the division into words is given. Accordingly, it seems appropriate
to stress when some unit constitutes a word, so in what follows the
familiar \meta{bar} notation for a metasyntactic variable is refined
to \word{bar} when this unit is additionally to be exactly one word
in a \Tcl\ sentence; the more generic \meta{bar} would still be used
for part of a word or a sequence of (zero or more) words.
Pragmatically, it is also convenient to make use of certain notations
from regular expressions---such as the \regopt, \regstar, and
\regplus\ quantifiers---to express repetition variation in command
syntaxes. For example
\begin{quote}
\ttfamily
list \word{element}\regstar\\
incr \word{variable} \word{amount}\regopt
\end{quote}
means the \texttt{list} command takes zero or more \word{element}s,
whereas the \texttt{incr} command takes a \word{variable} name and
optionally also an \word{amount} by which to increment it.
With parentheses to group pieces in such syntax expressions, the
syntax of \verb|if| may be stated as
\begin{quote}
\ttfamily\raggedright
if \word{condition} then\regopt\ \word{script}
$\bigl($elseif \word{condition} then\regopt\
\word{script}$\bigr)\regstar$ $\bigl($else\regopt\
\word{script}$\bigr)\regopt$
\end{quote}
showing not only that the final \texttt{else} clause is optional, but
also that it may be preceeded by zero or more \texttt{elseif}
clauses, and that the keywords \texttt{then} and \texttt{else} are
optional.
These syntax conventions are in the completion utility sources
applied not only to document individual commands, but also to
document data structures, since the string representations of \Tcl\
lists are parsed into elements according to the same rules as \Tcl\
sentences are parsed into words; the main difference is that variable
and command substitutions do not occur when parsing a list. Below it
is the application of these conventions to lists and other data
structures that is of more scientific interest, as data formats should
be documented for posterity even if the programs that generated them
may become obsolete.
To give an example, the string representation of a dictionary is
\begin{quote}
\ttfamily
$\bigl($\word{key} \word{value}$\bigr)\regstar$
\end{quote}
so the same as a list with an even number of elements, alternatingly
having the roles of key and value. As a matter of general computer
science, a \emph{dictionary} (associative array) is a sparse mapping
of \word{key}s to \word{value}s. \Tcl\ implements dictionaries with a
hashtable internal representation to achieve average $O(1)$
complexity for accessing individual elements; keys equality is string
equality.
Documentation may also need to state invariants and other propeties
of the data structures considered, and for this it rather becomes
appropriate to mix \Tcl\ code with ordinary mathematical formulae,
when notation for some necessary operation exists only in one but
not in the other. To signal what is what, \Tcl\ elements of such
mixed formulae will appear in \texttt{typewriter} font and typically
be delimited by command substitution brackets \texttt{[]}, whereas
standard mathematical elements use normal formula fonts (e.g.~math
italic). A trivial example is
\[
\text{\ttfamily [lindex [list $a$ $b$ $c$] $1$]} = b
\]
where three values $a$, $b$, and $c$ are given to the \texttt{list}
command to build a list with those three as elements. Then this list
is passed to the \texttt{lindex} command that goes on to extract the
element in position $1$ from this list; since \Tcl\ indexing is
$0$-based, that will be the second element $b$. Finally at the top
level of this mixed formula we assert mathematical equality $=$ of the
value returned by that \texttt{lindex} to $b$. The standard
mathematical formula language lacks good notation for this kind of
construction and indexing operations---we rather prefer to give names
to all elements we need to access---and programming languages
conversely have little notation for stating claims, as programs are
mainly about giving orders.
\section{The network datatype library}
The informal specification for a network is that is should be like a
term (in the logic sense, i.e., a formalised expression), except that
the underlying combinatorial structure should be that of a directed
acyclic graph~(\smaller{DAG}) rather than a tree, because it needs to
accomodate co-operations as well as ordinary operations. Formalising
that does however require sorting out exactly what sense of `tree' we
will generalise.
The tree structure of a term can be defined as the tree which as
vertices has the subterms of the term and links two vertices with an
edge if one is a maximal proper subterm of the other, but for our
purposes it is more convenient to take as vertices the
\emph{operations} (function symbols, including as nullary operations
any named constants); this turns the tree into a kind of data-flow
diagram (Figure~\ref{Fig:TermTrees&network}, middle).
\begin{figure}
\[
\begin{tabular}{c@{\qquad}c@{\qquad}c}
\begin{tikzpicture}
\node[draw] (r1c1) at (0,2) {$x$};
\node[draw] (r1c2) at (1,2) {$y$};
\node[draw] (r1c3) at (2,2) {$z$};
\node[draw] (r2c12) at (0.5,1) {$f(x,y)$};
\node[draw] (r2c3) at (2,1) {$g(z)$};
\node[draw] (r3c2) at (1,0) {$f\bigl( f(x,y), g(z) \bigr)$};
\draw (r1c1) -- (r2c12)
(r1c2) -- (r2c12)
(r2c12) -- (r3c2)
(r1c3) -- (r2c3) -- (r3c2);
\end{tikzpicture}%
&
\begin{tikzpicture}[>=latex]
\node (r1c1) at (0,2) {$x$};
\node (r1c2) at (1,2) {$y$};
\node (r1c3) at (1.7,2) {$z$};
\node[draw,circle] (r2c12) at (0.5,1) {$\mathsf{f}$};
\node[draw,circle] (r2c3) at (1.7,1) {$\mathsf{g}$};
\node[draw,circle] (r3c2) at (1.1,0) {$\mathsf{f}$};
\draw[->] (r1c1) to[out=-90,in=150] (r2c12);
\draw[->] (r1c2) to[out=-90,in=30] (r2c12);
\draw[->] (r2c12) to[out=-90,in=150] (r3c2);
\draw[->] (r1c3) -- (r2c3);
\draw[->] (r2c3) to[out=-90,in=30] (r3c2);
\draw[->] (r3c2) -- ++(0,-0.65);
\end{tikzpicture}%
&
% \begin{tikzpicture}
% \node (r1c1) at (0,2) {$x$};
% \node (r1c2) at (1,2) {$y$};
% \node (r1c3) at (2,2) {$z$};
% \node[draw,circle] (r2c12) at (0.5,1) {$\mathsf{f}$};
% \node[draw,circle] (r2c3) at (2,1) {$\mathsf{\Delta}$};
% \node[draw,circle] (r3c2) at (1,0) {$\mathsf{f}$};
% \node[draw,circle] (r3c3) at (2.5,0) {$\mathsf{g}$};
% \draw (r1c1) to[out=-90,in=150] (r2c12)
% (r1c2) to[out=-90,in=30] (r2c12)
% (r2c12) to[out=-90,in=135] (r3c2)
% (r1c3) -- (r2c3)
% (r2c3) to[out=-135,in=45] (r3c2)
% (r3c2) -- ++(0,-0.65)
% (r2c3) to[out=-45,in=90] (r3c3)
% (r3c3) -- ++(0,-0.65);
% \end{tikzpicture}%
\begin{tikzpicture}[>=latex]
\node (r1c1) at (0,2) {$x$};
\node (r1c2) at (1,2) {$y$};
\node (r1c3) at (2,2) {$z$};
\node[draw,circle] (r2c1) at (0,1) {$\mathsf{g}$};
\node[draw,circle] (r2c2) at (1,1) {$\mathsf{g}$};
\node[draw,circle] (r2c3) at (2,1) {$\mathsf{\Delta}$};
\node[draw,circle] (r3c2) at (0.7,0) {$\mathsf{f}$};
\node[draw,circle] (r3c3) at (2,0) {$\mathsf{f}$};
\draw[->] (r1c1) -- (r2c1);
\draw[->] (r1c2) -- (r2c2);
\draw[->] (r1c3) -- (r2c3);
\draw[->] (r2c1) to[out=-90,in=150] (r3c2);
\draw[->] (r2c2) to[out=-90,in=150] (r3c3);
\draw[->] (r2c3) -- (r3c2);
\draw[->] (r2c3) to[out=-30,in=30] (r3c3);
\draw[->] (r3c2) -- ++(0,-0.65);
\draw[->] (r3c3) -- ++(0,-0.65);
\end{tikzpicture}%
\\
subexpressions tree& operations tree&
network
\end{tabular}
\]
\caption{Term trees and a network}
\label{Fig:TermTrees&network}
\end{figure}
The reason this structure for an ordinary term will be a rooted tree
is that each operation syntactically produces exactly one result, but
that is not the case with co-operations; paths taken by data within
the expression may branch as well as join, creating a general
\smaller{DAG} rather than a tree. The problem of how to even
interpret such combinatorial objects as expressions is nontrivial,
but turns out to have a natural solution~\cite[Sec.~5]{NR1}.
It is important to note that already an operations tree needs more
structure than just the tree graph to encode a term. For one thing,
there are the operation symbols \emph{decorating} the vertices, but
more importantly the children of an operation are not interchangeable
(unless that operation is commutative, but this is not a property
that would be encoded into the syntax of a formula)---instead each
incoming edge is attached to a separate \emph{port} of the vertex,
and the set of available ports depend on the operation symbol; for
example a binary operation has one `left operand' port and a
separate `right operand' port. In a network the set of outgoing ports
similarly depends on the operation symbol, so that for example a
vertex for the coproduct $\mathsf{\Delta}$ has one incoming port but
two outgoing ports. Finally networks are \emph{open graphs} in that
they can have external edges signifying output (results) from a
network and input (arguments) to it. In comparison to traditional
terms the incoming external edges assume the role of free variables,
which has interesting consequences for the rewrite theory: rewrite
linearity becomes a syntactic requirement (although having
co-operations allows for effectively reintroducing nonlinearity via
explicit rules) and unification is no longer a primitive operation.
\subsection{Pure networks}
\label{Ssec:PureNetwork}
Mathematically, \cite{NR1}~defines a network as a tuple
\[
G = (V,E,h,g,t,s,D)
\]
where \(\N \supset V \supseteq \{0,1\}\) and \(E \subset \N\) are the
sets of vertices and edges of the underlying directed acyclic graph
$(V,E,h,t)$ in which \(h,t\colon E \longrightarrow V\) are the
functions mapping every edge $e$ to its head $h(e)$ and tail $t(e)$
respectively. To turn this into an open graph, the two mandatory
vertices $0$ and $1$ are fixed as representing the output and input
respectively sides of a network; an edge $e$ is outgoing external if
it has \(h(e)=0\) and incoming external if it has \(t(e)=1\).
Requiring that vertices and edges (or technically: the \emph{labels}
of vertices and edges) are all natural numbers have certain
set-theoretic advantages for defining isomorphism classes of
networks, which are technically the objects that are being rewritten.
The two functions \(g,s\colon E \longrightarrow \Z_{>0}\) map an edge
(label) to the head and tail respectively \emph{index}, which
identify the ports to which the edge is attached. Finally $D$ with a
domain of $V \setminus \{0,1\}$ is the function mapping inner
vertices to the operation symbols by which these are decorated. Such
a symbol $x$ is given with an \emph{arity} $\alpha(x)$ and a
\emph{coarity} $\omega(x)$ that are equal to the in-degree and
out-degree respectively of any vertex decorated with~$x$. One also
speaks about arity $\alpha(G)$ and coarity $\omega(G)$ of an entire
network; these are the numbers of inputs and outputs to the network
$G$ as a whole (thus technically the \emph{out}-degree of $1$ and
\textit{in}-degree of $0$, respectively).
This mathematical concept is implemented in the network library as the
datatype of \emph{pure networks}.
For ease of access, pure networks are implemented as heterogeneous
nested lists, where some nesting levels act as records whereas others
act as arrays. At the top level there is simply a pair
\begin{quote}
\word{vertices} \word{edges}
\end{quote}
where \word{vertices} and \word{edges} are both lists indexed by