-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmplutil2.dtx
9148 lines (9133 loc) · 324 KB
/
cmplutil2.dtx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%
% \iffalse (driver)
%<*driver>
\documentclass{tclldoc}
\usepackage{amsmath,amssymb,amsfonts,amsthm}
% \newtheorem{lemma}{Lemma}[section]
% \theoremstyle{definition}
% \newtheorem{definition}[lemma]{Definition}
% \newtheorem{example}[lemma]{Example}
% \theoremstyle{remark}
% \newtheorem*{remark}{Remark}
% \newtheorem*{algsketch}{Algorithm sketch}
\PageIndex
\CodelineNumbered
\setcounter{IndexColumns}{2}
\newenvironment{ttdescription}{%
\description
\def\makelabel##1{\hspace\labelsep\normalfont\ttfamily ##1}%
}{\enddescription}
\makeatletter
\NewDescribeCommand{\describecolumn}{%
\XD@grab@sarg{/}\XD@grab@sarg{*}\XD@grab@sarg{+}%
\XD@grab@harmless@oarg\XD@grab@harmless{}%
}{5}{%
\ifx #2\BooleanFalse
\GenericDescribePrint{%
\MacroFont #5 \normalfont(column)%
}%
\fi
\begingroup
\def\meta##1{(##1)}%
\unrestored@protected@xdef\@gtempa{#5}%
\endgroup
\IndexEntry{%
\LevelSorted{\@gtempa}{\texttt{#5} column}%
\ifx \NoValue#4\@empty \else
\LevelSorted{#4}{\texttt{#4} table}%
\fi
}{\ifx #1\BooleanTrue usage\else main\fi}{\thepage}%
\ifx \NoValue#4\@empty \else
\IndexEntry{%
\LevelSorted{#4}{\texttt{#4} (table)}%
\LevelSorted{\@gtempa}{\texttt{#5} column}%
}{\ifx #1\BooleanTrue usage\else main\fi}{\thepage}%
\fi
\ifx #3\BooleanTrue
\texttt{#5}%
\expandafter\@gobble % Eats \ignorespaces
\fi
}
\makeatother
\makeatletter
\DeclareRobustCommand\SMC{%
\ifx\@currsize\normalsize\small\else
\ifx\@currsize\small\footnotesize\else
\ifx\@currsize\footnotesize\scriptsize\else
\ifx\@currsize\large\normalsize\else
\ifx\@currsize\Large\large\else
\ifx\@currsize\LARGE\Large\else
\ifx\@currsize\scriptsize\tiny\else
\ifx\@currsize\tiny\tiny\else
\ifx\@currsize\huge\LARGE\else
\ifx\@currsize\Huge\huge\else
\small\SMC@unknown@warning
\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
}
\newcommand{\SMC@unknown@warning}{\TBWarning{\string\SMC: unrecognised
text font size command -- using \string\small}}
\makeatother
\newcommand{\PROP}{{\SMC PROP}}
\newcommand{\PROPs}{{\SMC PROP}s}
\newcommand{\PRO}{{\SMC PRO}}
\newcommand{\ISBN}{{\SMC ISBN}}
\newcommand{\ISBNX}{{\SMC X}}
\providecommand*{\Dash}{%
\hspace*{0.166667em}\textemdash\hspace{0.166667em}%
}
\providecommand*{\Ldash}{%
\hspace{0.166667em}\textemdash\hspace*{0.166667em}%
}
\providecommand*{\dash}{\textendash\hspace{0pt}}
\newcommand{\package}[1]{\textsf{#1}}
\newcommand{\Tcl}{\Tcllogo}
\newcommand*{\DefOrd}[2][]{\textbf{#2}}
\newcommand*{\emDefOrd}[2][]{\emph{#2}}
\newcommand{\mc}{\mathcal}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Zp}{\mathbb{Z}_{>0}}
\newcommand{\Fpil}{\longrightarrow}
\newcommand{\cross}[2]{{}^{#1}\mathsf{X}^{#2}}
\newcommand{\same}[1]{\mathsf{I}^{#1}}
\begin{document}
\DocInput{cmplutil2.dtx}
\PrintIndex
\end{document}
%</driver>
% \fi
%
% \title{Another \PROP py completion utility}
% \author{Lars Hellstr\"om}
% \date{2009-03-17--}
% \maketitle
%
% \begin{abstract}
% This is a full-featured utility for completing a set of
% \PROP py identities. The main innovation compared to its
% predecessor is that the rules and ambiguities are kept in
% an external database rather than RAM, but it also adds support
% for non-monomial identities and improved handling of undirectable
% equalities.
% \end{abstract}
%
% \tableofcontents
%
%
% \subsection*{Docstrip modules}
%
% There is no main delimiter around the code in this file, but
% several modules that choose code variants.
%
%
% \subsubsection*{Algorithm variants}
%
% The primary point of variance is the
% \describestring*+[proc]{pick_next_ambiguity} procedure, which is
% called to start processing of the next ambiguity. Not surprisingly
% the variability here includes that of selection strategy, but there
% is also a more fundamental variance that concerns when (and if)
% certain things are done, that may be described as \emph{styles} of the
% algorithm.
%
% The basic style is to employ a straightforward implementation
% of Buchberger's algorithm, storing the full queue of ``critical pairs''
% as rows with state |A| in the \describestring*+[table]{am6iguities}
% table, and weeding it by use of the ``second criterion'' whenever a
% new rule is discovered.
%
% The basic style also has a cautious mode of operations, which
% is activated by setting the \describestring+[var.]{am6iguity_top}
% variable to something else than its default |infinity| value. In
% this mode, only ambiguities at or below this top value are allowed
% into the |am6iguities| table; larger ambiguities are instead kept
% in the more compact |pa1rs| table until a raise in |am6iguity_top|
% allows them back into the |am6iguities| table.
%
% A problem with the basic style is that a lot of time is spent
% weeding the table of ambiguities\Dash this leads to congestion in
% the transfer of data from database to memory. The \Module{lazy} style
% is instead that Buchberger's second criterion is applied as the
% first step when an ambiguity has been picked for resolution.
%
% The \Module{lazier} style attacks a different problem common to the
% basic and lazy styles, namely that overlaps were sought as soon as
% a pair of rules could be formed; since the number of pairs is
% quadratic in the number of rules, this meant much more time was
% spent forming overlaps than was spent using or discovering rules,
% which is problematic when the aim is to discover new identities.
% The \Module{lazier} style is instead to merely estimate how an
% ambiguity between two rules would rank, and delay actually
% computing it until it reaches ambiguities of that rank.
%
% In practice this means that both the |am6iguities| and |pa1rs| tables
% are treated as queues, and each |pick_next_ambiguity| call picks an
% item from the queue whose head has lowest rank. If a pair is picked
% then any ambiguities formed from it go into the ambiguity table
% (where they will abide their time if the rank turned out to be
% higher than expected), whereas if an ambiguity is picked then it
% will be resolved.
%
% The \Module{BB2} module guards (in the case of \Module{lazier})
% the code for the `second criterion' prereduction step. It was made
% optional since it turned out to almost never apply in that setting.
%
% There are several modules which attempt to control the order in
% which ambiguities are processed; not specifying any of them means
% they are processed in an arbitrary order. The best supported module
% is \Module{vsquare}, since no code variant employing a |pa1rs|
% table has been tested with anything else. The full list are:
% \begin{itemize}
% \item
% \Module{classic}\Dash order primarily by |m1nus|, secondarily
% by |p1us|. This was (pretty much) the mode of operation in
% \textsf{cmplutil1}.
% \item
% \Module{oldnew}\Dash order by the sum of |m1nus| and |p1us|.
% \item
% \Module{pcols}\Dash order by the sum of all profile columns
% (requires \Module{aprofile}); this is the same as the number of
% inner vertices and edges in the ambiguity site.
% \item
% \Module{vcols}\Dash order by the sum of all profile columns for
% vertices (requires \Module{aprofile}); this is the same as the
% number of inner vertices in the ambiguity site.
% \item
% \Module{vsum}\Dash order by the sum, over all terms in the
% first step of the |re5olution|, of the orders of the networks.
% The idea is that the terms of the resolution are more like the
% terms of the rule that it might give rise to than the ambiguity
% site is, so this should help ``small'' rules be discovered more
% quickly.
% \item
% \Module{vsquare}\Dash order by the sum, over all terms in the
% first step of the |re5olution|, of the \emph{squares} of the
% orders of the networks. The idea is that this provides a better
% balance between the order of the largest term and orders of
% other terms than does a straightforward sum of the orders.
% \end{itemize}
% ToDo: Implement ordering by some norm (e.g.~the Frobenius norm) of
% the matrix representation of the first biaffine \PROP\ element used
% to compare networks; this is probably the closest one can get to
% the standard strategy.
%
% The \Module{axiomsfirst} module modifies the initial processing (in
% some problem initialisations) to process all entered congruences
% before looking at pairs. Whether takes effect also depends on
% having modules \Module{lazier} and \Module{vsquare}.
%
%
% \subsubsection*{Data structure variants}
%
% The \Module{zlib} module causes some data to be compressed before
% it is stored in the database; it is independent of other variants.
%
% The \Module{aprofile} module adds profile columns and an index of
% those to the \describestring*+[table]{am6iguities} table.
%
% The \Module{pairmap} module adds the |pa1rsMap| table and causes
% ``large'' pairs (i.e., those that wouldn't be processed for quite
% some time) to go into this more compact table, rather than the
% basic |pa1rs| table.
%
% \begin{variable}{code_variants}
% The |code_variants| variable is a list of those \textsf{docstrip}
% options which affect how (and what) data are stored in the database
% and which are true for this instance of the utility.
% \begin{tcl}
set code_variants {}
%<zlib>lappend code_variants zlib
%<aprofile>lappend code_variants aprofile
%<pairmap>lappend code_variants pairmap
% \end{tcl}
% This list can be compared against the cached value stored in a
% database to determine if opening the database would be reliable.
% \end{variable}
%
%
% \subsubsection*{Problem initialisation modules}
%
% The \Module{Frobenius} module contains all code initialising the
% utility with (one axiom system of) a Frobenius algebra. The
% submodule \Module{coproduct} guards a congruence defining the
% coproduct.
%
% The \Module{Hopf} module contains all code initialising the utility
% with the axioms of a Hopf algebra or variation thereof. It has
% several submodules:
% \begin{itemize}
% \item
% The \Module{twist} module modifies the definitions to be for
% ``Hopf algebra with abstract twist'', i.e., the distributive
% law employs an abstract twist vertex rather than an explicit
% permutation. In particular this extends the partial order
% with $4$ extra blocks. \Module{untwist} is a submodule of
% \Module{twist} that adds an extra vertex whose only given
% property is that it is the inverse of the twist vertex.
% \item
% The \Module{cocomplex} module adds two constant vertices |Re|
% and |Im|, and axioms for their interaction with |Delta| and
% |epsilon| that give them the behaviour of the real-part and
% imaginary-part maps in the coalgebra dual to the complex
% numbers as a real algebra.
% \item
% The \Module{testequalities} module adds some congruences that
% can be derived anyway, but only after several hundred steps.
% As the name hints, they were intended for testing the handling
% of equalities, but subsequent refinements of the ordering have
% actually made them orientable.
% \item
% The \Module{moretwist} module adds axioms about the |twist|.
% It is hoped that these will actually turn out to be implied by
% the basic set of axioms, but if that is true then the proof
% would have to be rather large.
% \item
% The \Module{division} module adds a |div| vertex, drops
% all axioms about |m| and |S| vertices, and adds instead some
% axioms about |div| which are derived from a formalisation of
% group that takes division as the primitive operation. The idea
% of this was mainly to see how difficult it is to derive the
% ordinary group axioms from the ones for division, but since the
% division axioms are not linear (and less linear than the
% ordinary group axioms), it follows of course that one needs to
% make use of the coproduct and counit to duplicate or destroy
% input values as necessary, and then the best one could hope for
% would be a Hopf algebra.
% \end{itemize}
% The module \Module{hopfbase} only contains the vertex definitions
% and ordering, but no congruences. It is meant as a starting point
% for experimentation.
%
% The \Module{HomAss} module initialising the utility with the axioms
% of a hom-associative algebra or variation thereof. The
% \Module{aishom} submodule adds the congruence stating that the
% unary operation is a homomorphism of the algebra.
%
% The \Module{WeakAss} module initialising the utility with a ``weak
% associative'' axiom.
%
% Common to the \Module{HomAss} and \Module{WeakAss} modules is a
% (fairly large) block of code which, in coarity~$1$ settings only,
% can produce estimates of the growth of the operad by computing a
% finite tree automaton for the language of irreducible networks.
%
% The \Module{Transposition} module sets up an axiom system for a
% vertex to be an abstract transposition: it is its own inverse and
% satisfies the Yang--Baxter equation. The main challenge here is to
% come up with an ordering that actually manages to orient some
% derived equalities.
%
%
% \subsubsection*{Other}
%
% The \Module{stats} module contains code that makes the main loop
% keep track of which procedures it calls: how many times and the
% accumulated runtime (in microseconds).
%
% The \Module{Tk} module contains code that gives the utility a GUI.
% \textbf{Note} that many example modules contain a call to
% |tk_getSaveFile|, which means they too have a Tk dependency.
%
% The \Module{testserver} module contains code that starts a HTTP
% miniserver at port 9090. When that server is asked to GET a path
% that begins with what looks like a rule number, then it delivers an
% XHTML+SVG rendering of that rule (with proof).
%
% The \Module{trans} module bracket some code that makes each
% creation of a new rule a separate transaction. The primary reason
% for this was to speed up the database access (collect many small
% transactions in one big), but the transaction overhead seems to
% have been rather small.
%
% The \Module{splitnewrule} module splits the basic |new_ru1e|
% procedure in smaller pieces, to improve GUI responsiveness.
%
%
%
% \section{Data structures}
%
% The main data structures are the list of rules and the list of
% ambiguities. Both of these are for size reasons kept in a database
% (accessed through the TDBC interface), as experience has shown
% that they have a tendency to grow beyond what is practical for
% \Tcl\ to keep in memory. It turns out that also certain search
% operations benefit from this arrangement, as the database machinery
% can perform a rather effective initial thinning of possibilities to
% consider, if provided with the necessary information.
% A potential problem with putting things in a database is however
% the use of SQL, as implementations of this language are rather
% varied; in particular any English word \emph{might} turn out to
% have been seized for syntactic purposes, and thus not work as the
% name of a column. Therefore the names which really \emph{are} English
% words have been obfuscated through the substitution of digits for
% one or several letters, as one sometimes sees in Leet orthography.
%
% Accordingly, the table of rules is named
% \describestring+[table]{ru1es} and the table of ambiguities is
% named \describestring+[table]{am6iguities}.
% Both rules and ambiguities are assigned a number when created, so
% there is in both a \describecolumn*[ru1es]{num6er}
% \describecolumn+[am6iguities]{num6er} column that
% can be designated as the \texttt{PRIMARYKEY} for rows in the
% table.
%
% \begin{arrayvar}{DB_column}
% The |DB_column| array is indexed by the name of a table and keeps
% tracks of the columns in this table. Each element is a list with
% the structure
% \begin{displaysyntax}
% \begin{regblock}[\regstar] \word{name} \word{type}
% \end{regblock}
% \end{displaysyntax}
% where the \word{name} is the column name and \word{type} is its
% SQL type.
% \begin{tcl}
lappend DB_column(ru1es) num6er {INTEGER PRIMARY KEY AUTOINCREMENT}
lappend DB_column(am6iguities) num6er {INTEGER PRIMARY KEY AUTOINCREMENT}
% \end{tcl}
% One could probably do without the |AUTOINCREMENT| here, as rows
% are never removed from these tables, but I don't think it hurts
% either.
%
% Note: A mechanism for adjusting column types to the underlying
% database would be good. The |column_defs| proc has the beginnings
% of such a mechanism.
% \end{arrayvar}
%
% \describecolumn[ru1es]{lhs}
% \describecolumn[am6iguities]{si7e}
% \describecolumn[ru1es]{feed6acks}
% \describecolumn*[am6iguities]{feed6acks}
% Conceptually however, the identifying feature of a rule or ambiguity
% is a network with feedback. Since all networks involved in a rule
% or ambiguity has the same set of feedbacks, this information is put
% in the separate column |feed6acks|. The network itself is the left
% hand side in the case of a rule, and the site of the ambiguity in
% the other case, so these columns are named |lhs| and |si7e|.
% \begin{tcl}
lappend DB_column(ru1es) lhs {NOT NULL} feed6acks {NOT NULL} \
rhs {NOT NULL}
lappend DB_column(am6iguities) si7e {NOT NULL} feed6acks {NOT NULL}
% \end{tcl}
%
% \describecolumn[ru1es]{rhs}
% The other part of a rule is the right hand side, which is a linear
% combination of networks. Hence it is a list\slash dictionary
% \begin{displaysyntax}
% \begin{regblock}[\regstar] \word{network} \word{coefficient}
% \end{regblock}
% \end{displaysyntax}
% where the \word{network}s should be on canonical form and the
% \word{coefficient}s belong to whatever ring of scalars one operates
% over.
%
% Another important column in |ru1es| is the
% \describecolumn+[ru1es]{st4te} column, which expresses the state of
% said row. (|am6iguities| has a similar |st4te| column, documented
% separately.) This is basically a column where the values are
% individual characters, interpreted as follows:
% \begin{ttdescription}
% \item[R]
% An ordinary active rule. Only rows in this state are used in
% reduction.
% \item[r]
% An ordinary inactive rule. Rules get demoted from |R| to |r|
% when a new rule is discovered which act on the same networks as
% this one does.
% \item[E]
% An active equality pseudo-rule. These are created when an
% equality without unique leading term are discovered, and
% participate in ambiguity creation but are not used for
% reduction (since that would generate cycles). The rewrite
% system cannot be complete as long as there exists this kind of
% entries.
% \item[e]
% An inactive equality pseduo-rule (does not form ambiguities,
% and is tolerated in a complete rewrite system).
% \end{ttdescription}
% It may also be of interest to introduce a class |I| of rules where
% the leading coefficient is not invertible, but that can be a later
% development.
% \begin{tcl}
lappend DB_column(ru1es) st4te {CHAR NOT NULL} pr00f INTEGER
% \end{tcl}
% The \describecolumn+[ru1es]{pr00f} column in the |ru1es| table
% contains the number of the ambiguity from which the rule or
% equality was derived, or null if it was given as an axiom.
%
% The \describecolumn+[ru1es]{wh3n1} column is the timestamp
% (|clock seconds| value) for when the rule or equality was created
% and \describecolumn+[ru1es]{wh3n2} is (if non-null) the timestamp
% for when it was retired.
% \begin{tcl}
lappend DB_column(ru1es) wh3n1 INTEGER wh3n2 INTEGER
% \end{tcl}
% An SQL date value might have been more appropriate, but I find
% this sufficient. Some drivers might however require the column to
% be a bigint.
%
%
%
% \subsection{The legend}
%
% Besides the two main tables, there is a table
% \describestring+[table]{le9end} which basically is a dictionary of
% auxiliary information providing context to the |ru1es| and
% |am6iguities|. This table only has two columns:
% \describecolumn+[le9end]{k3y} and \describecolumn+[le9end]{va1ue},
% both of which are arbitrary strings. The |k3y| must be unique
% within the table.
% \begin{tcl}
lappend DB_column(le9end) k3y {UNIQUE} va1ue {}
% \end{tcl}
%
% The \describestring+[legend key]{code_variants} entry in the legend
% stores the value of the |code_variants| variable for the version of
% the utility that created the database. If the two are different
% then there may be problems with trying to opening.
%
% The \describestring+[legend key]{signature} entry in the legend is
% the signature of the free \PROP\ in which the calculations are
% carried out, in the form of a dictionary mapping each annotation
% $x$ to the pair `$\alpha(x) \ \omega(x)$' of arity and coarity. In
% other words, it has the structure of the \word{type-dict} argument
% of the |network::pure::construct| procedure. \textbf{The order of
% the keys in this dictionary is significant} since the interpretation
% of the profile columns depend on it. \textbf{The existence of this
% entry in a database} signals that the signature may no longer the
% modified; there may be data stored elsewhere that would be
% extremely difficult to adjust according to changes in the signature.
%
% The \describestring+[legend key]{coeff_setup} entry is a script
% that sets up anything needed for the |coeff_cmd| (requires
% packages, creates aliases, etc.).
% The \describestring+[legend key]{coeff_cmd} entry in the legend is
% the command prefix of the ring of coefficients.
% \begin{quote}
% \textbf{Discussion.} \itshape
% It might be better to store a make-command for the coefficient
% ring prefix than to store the actual prefix, on the grounds that
% this is more likely to survive updates in the package. On the
% other hand, neither approach would survive an update in the
% package that changes the data representation, so persistence here
% is an interesting problem on the level of principles.
% \end{quote}
%
% Other \texttt{le9end} entries are described where the corresponding
% runtime variables are introduced.
%
%
% \subsection{Network profiles}
%
% When reducing a network, the most time-consuming task is that of
% finding a rule whose |lhs| occurs in it or deciding that no such
% rule exists, since |network::wfb::instances| is rather complicated.
% Profiles provide a shortcut, by counting the number of occurencies
% of certain features within a network, and thus a way of eliminating
% rules from the search before even looking at the |lhs|, since there
% can be no instances of $H$ in $G$ if there are more occurencies of
% some feature in $H$ than there is in $G$.
%
% The features being counted are simply the vertices (separate count
% for each annotation) and the edges, where the endpoints are used to
% refine the count, paying attention to both vertex annotation and
% the index of the edge. For a signature $(\Omega,\alpha,\omega)$
% there are thus $\sum_{x \in \Omega} \omega(x)$ different things that
% can be at the tail end of an edge, and
% $\sum_{x \in \Omega} \alpha(x)$ different things that can be at the
% head end of it, so the number of different edge features is the
% product of these. There is no point in counting boundary edges
% since their numbers are already determined by the counts of
% internal edges and vertices; also these counts are more troublesome
% to compare, as a boundary edge in $H$ can be internal in $G$.
% Counting edges this way is only slightly weaker than counting
% connected $2$-vertex subsemigraphs; it does not notice when such a
% subsemigraph is more than simply connected, but that will on the
% other hand probably be very rare and would not warrant the
% increased complexity.
%
% \begin{proc}{network_profile}
% This procedure computes the profile (a dictionary mapping column
% names to counts). The call syntax is
% \begin{displaysyntax}
% |network_profile| \word{signature} \word{network}\regopt
% \end{displaysyntax}
% When called without a \word{network} argument, the command
% returns an ``all zeroes'' profile (technically it computes the
% profile of the empty network) that can be used for example to get
% the list of profile columns for this \word{signature}.
%
% The column names are of the form
% \describecolumn*[am6iguities]{v\meta{num}}
% \describecolumn+[ru1es]{v\meta{num}} for vertex counts and
% \describecolumn*[am6iguities]{e\meta{num}}
% \describecolumn+[ru1es]{e\meta{num}} for edge counts. The names
% can be opaque because they are not meant to be interpreted; only
% this procedure converts data from the network realm to the
% profile realm, so it only needs to do so consistently.
%
% The implementation relies heavily on |incr|'s new auto-initialise
% behaviour. The |VC| array is for vertex counts and is indexed by
% vertex annotations. The |EC| array is for edge counts and is
% indexed by lists on the form
% \begin{displaysyntax}
% \word{head-annotation} \word{head-index} \word{tail-annotation}
% \word{tail-index}
% \end{displaysyntax}
% \begin{tcl}
proc network_profile {signature {NW {{{"" {} {}} {"" {} {}}} {}}}} {
foreach v [lindex $NW 0] {incr VC([lindex $v 0])}
foreach e [lindex $NW 1] {
lset e 0 [lindex $NW 0 [lindex $e 0] 0]
lset e 2 [lindex $NW 0 [lindex $e 2] 0]
incr EC($e)
}
set inL {}
set outL {}
set res {}
set n 0
foreach {x p} $signature {
lappend res v$n [incr VC($x) 0]
for {set i 0} {$i < [lindex $p 0]} {incr i} {
lappend inL [list $x $i]
}
for {set i 0} {$i < [lindex $p 1]} {incr i} {
lappend outL [list $x $i]
}
incr n
}
set n 0
foreach o $outL {
foreach i $inL {
lappend res e$n [incr EC([concat $o $i]) 0]
incr n
}
}
return $res
}
% \end{tcl}
% \end{proc}
%
%
%
% \subsection{Ambiguities}
%
% Most of the information in an ambiguity row is in the
% \describecolumn+[am6iguities]{re5olution} column, which holds the
% list of steps that were taken while trying to resolve the
% ambiguity. Each of these steps is a linear combination of networks,
% and therefore a dictionary mapping monomials to coefficients. The
% first step for an ambiguity $(t_1,\mu,t_2)$ is always the
% difference $t_1(\mu) \mathcar"2200 t_2(\mu)$ (the ``S-polynomial'').
% Since each network typically appears in more than one step of the
% |re5olution|, the \emph{monomials} there are not the networks
% themselves, but indices into a list of networks that have occurred
% during the resolution of this ambiguity. That list of networks kept
% in the \describecolumn+[am6iguities]{m0n0mials} column.
% \begin{tcl}
lappend DB_column(am6iguities) re5olution {NOT NULL}
%<-zlib>lappend DB_column(am6iguities) m0n0mials {NOT NULL}
%<+zlib>lappend DB_column(am6iguities) deflated_m0n0mials BLOB
% \end{tcl}
% Experience has shown that the size of a |m0n0mials| list for a
% resolve ambiguity is typically on the order of a few kilobytes, and
% account for a substatial percentage of the overall database size,
% so it makes sense to compress them. In the \Module{zlib} setting,
% there is not a |m0n0mials| column, but instead a
% \describecolumn+[am6iguities]{deflated_m0n0mials} column, whose
% contents are the result of doing
% \begin{displaysyntax}
% zlib deflate [encoding convertto utf-8 \word{m0n0mials}] 9
% \end{displaysyntax}
% where the |encoding convertto| of course doesn't change anything
% for ASCII data but provides correctness also for non-ASCII data.
%
% Besides the individual steps in the resolution, it is also
% interesting to keep track of which rules were applied, to which
% monomial they were applied, and exactly where in these they were
% applied. The \describecolumn+[am6iguities]{wh1ch} column is a list of
% rule |num6er|s such that the $i$th rule listed here was used to
% transform the $i$th step into the $(i +\nobreak 1)$th step. The
% \describecolumn+[am6iguities]{wh3re} column is a list of pairs
% \begin{displaysyntax}
% \word{monomial} \word{region}
% \end{displaysyntax}
% that specify to which monomial the corresponding rule from |wh1ch|
% was applied, and what region in it was replaced when doing so. The
% reason the rule numbers are kept in a separate column is that this
% makes it feasible to e.g.~search for resolutions that employed a
% particular rule.
% \begin{tcl}
lappend DB_column(am6iguities) wh1ch {} wh3re {}
% \end{tcl}
%
% It should be observed that |wh1ch| and |wh3re| do not provide
% information about how the first resolution step $t_1(\mu)-t_2(\mu)$
% is formed. The rules that take part in this are kept in the
% \describecolumn+[am6iguities]{p1us} and
% \describecolumn+[am6iguities]{m1nus} columns, where |p1us| is used
% in $t_1$ and |m1nus| is used in $t_2$. The corresponding regions
% of the |si7e| are kept in the
% \describecolumn+[am6iguities]{p1us2eg} and
% \describecolumn+[am6iguities]{m1nus2eg} columns.
% \begin{tcl}
lappend DB_column(am6iguities)\
p1us {INTEGER} m1nus {INTEGER} \
p1us2eg {NOT NULL} m1nus2eg {NOT NULL}
% \end{tcl}
% It should be observed that \emph{the \texttt{p1us} and \texttt{m1nus}
% columns may be null}, for an ambiguity that is used to feed an
% unspecified congruence into the completion procedure. In such
% ambiguities, the \describecolumn[am6iguities]{si7e} network is
% arbitrary (usually the first element of |m0n0mials|) and the
% profile columns are bogus (usually set to all zeroes). Furthermore
% the |p1us2eg| and |m1nus2eg| columns contain text descriptions of
% the identity being encoded. The format is unspecified, but the
% |p1us2eg| column can contain a longer description and |m1nus2eg|
% column a shorter description.
%
% Then there is the \describecolumn+[am6iguities]{st4te} column,
% which expresses the status of an ambiguity.
% \begin{ttdescription}
% \item[A]
% An active ambiguity, i.e., one that still requires resolution.
% \item[0]
% An ambiguity that has been resolved to $0$. For ambiguities in
% this state, the final element in the |re5olution| is empty
% (representing $0$).
% \item[r]
% An ambiguity that was made resolvable by the creation of a new
% rule; exactly which rule that was can be determined by
% looking at the |pr00f| column in the |ru1es| table.
% \item[e]
% An ambiguity that could not be resolved, and instead lead to
% the creation of two or more equalities. Exactly which
% equalities can be determined by looking at the |pr00f| column
% in the |ru1es| table.
% \item[d]
% An ambiguity that could not be resolved, and would have lead to
% the creation of two or more equalities, where it not that these
% (or some simpler form of them) are already known. The
% \describecolumn/+[am6iguities]{wh1ch} list has one element,
% namely the |num6er| of the equality that implied the final step
% in the |re5olution|.
% \item[S]
% A suspended ambiguity, i.e., one that still requires resolution
% but is expected to resolve when an additional rule has been
% added. Information about where that rule might come from is not
% kept in the database.
% \item[2]
% The ambiguity has not been explicitly resolved, but it has been
% discarded as useless by ``Buchberger's second criterion''. The
% \describecolumn/+[am6iguities]{wh1ch} list has one element,
% namely the |num6er| of the rule or equality that has an
% instance in the site of the ambiguity.
% \end{ttdescription}
% \begin{tcl}
lappend DB_column(am6iguities) st4te {CHAR NOT NULL}
% \end{tcl}
% It would not be possible to use the |st4te| column to distinguish
% ambiguities used to feed congruences into the system, as these
% ambiguities get resolved just like ordinary ones and thus change
% state.
%
% As for rules, the \describecolumn+[am6iguities]{wh3n1} column is
% the timestamp for when the ambiguity was created and
% \describecolumn+[am6iguities]{wh3n2} is (if non-null) the timestamp
% for when the ambiguity was retired.
% \begin{tcl}
lappend DB_column(am6iguities) wh3n1 INTEGER wh3n2 INTEGER
% \end{tcl}
%
% The \describecolumn+[am6iguities]{vRe5Sum} column contains the
% total number of (internal) vertices in all terms of the first step
% of the |re5olution|. This number is one candidate to order by when
% selecting the next ambiguity to process. The
% \describecolumn+[am6iguities]{v2Re5Sum} column similarly contains
% the sum over all terms of the squares of the numbers of internal
% vertices.
% \begin{tcl}
lappend DB_column(am6iguities) vRe5Sum INTEGER v2Re5Sum INTEGER
% \end{tcl}
%
%
% \subsection{Pairs}
%
% Experience with the first example problem (Hopf with twist) has shown
% that the database is around $1.5\,\mathrm{GB}$ when the |am6iguities|
% table reaches half a million rows. Most of these ambiguities are
% expected to be irrelevant\Ldash generated from a rule or equality
% that is subsequently dropped, and too large to figure in the proofs
% of anything interesting\Dash so it would be interesting to
% establish a less spacious storage for ambiguities far from those
% that are currently being chosen for resolution.
%
% The natural format for this would be as a table of critical pairs
% that have not yet contributed to the |am6iguities| table. This
% table is called \describestring+[table]{pa1rs}, and its two main
% columns are \describecolumn+[pa1rs]{p1us} and
% \describecolumn+[pa1rs]{m1nus} with the same interpretations as in
% |am6iguities|.
% \begin{tcl}
lappend DB_column(pa1rs) p1us {INTEGER} m1nus {INTEGER} \
pick8y {NUMERIC}
% \end{tcl}
% There is also a column \describecolumn+[pa1rs]{pick8y} which holds
% whatever rank is used when picking ambiguities.
%
% \medskip
%
% More experience still has shown that even if the space requirements
% are kept in moderation when using pairs, it still takes an awful
% lot of time to enumerate all the overlaps, especially since most
% of the imformation is simply going to be thrown away. It would be
% better to delay this step until some ambiguity is actually going to
% be used; on one hand this means remembering also pairs for which
% there is no overlap, but on the other hand it means not keeping
% multiples (one pair for each ambiguity) of pairs, and on the whole
% it all seems to even out. The problem is then how to compute a
% reasonable |pick8y| value for a pair.
%
% At least in the \Module{vsquare} case that the |pick8y| value is
% the sum over all terms of the squares of the orders of the
% networks, this is actually easy enough to be done in SQL. The basic
% idea is that when annexing to network $G$ of order $g$ a list
% $H_1,\dotsc,H_n$ of networks of orders $h_1,\dotsc,h_n$, the
% order-square sum of the result list is
% \begin{equation*}
% \sum_{i=1}^n (g + h_i)^2 =
% g^2 + 2g\sum_{i=1}^n h_i + \sum_{i=1}^n h_i^2
% \text{;}
% \end{equation*}
% hence in order to have the left hand side readily available for
% every value of $g$, it is sufficient to precompute and store the
% two sums in the right hand side.
%
% A trickier issue is that of determining the appropriate value of
% $g$, since this is going to be the number of vertices (in the |lhs|
% of the other rule of the pair) that don't overlap. An accurate
% value is not feasible, but it should be sufficient to merely give
% a good guess, and as that can be taken the average number of in
% all overlaps with rule computed so far! Hence a prototypical
% command for adding to the |pa1rs| table all pairs that can be
% formed from a (new) rule would be:
%\begin{verbatim}
% INSERT INTO pa1rs (p1us,m1nus,pick8y)
% SELECT num6er, :newnum,
% annex1*annex1 + 2*annex1*vSum + v2Sum +
% annex2*annex2 + 2*annex2*:vSum + :v2Sum
% FROM ( SELECT num6er, vSum, v2Sum,
% :lhsOrder - overl4p AS annex1,
% lhsOrder - overl4p AS annex2
% FROM ( SELECT num6er, vSum, v2Sum,
% v0+v1+v2+v3+v4+v5 AS lhsOrder,
% coalesce( overl4pSum / overl4pTimes, 1 ) AS overl4p
% FROM ru1es WHERE st4te IN ('R','E') ))
%\end{verbatim}
%
% This requires some four new columns in the |ru1es| table, but no
% more than that. The \describecolumn+[ru1es]{vSum} and
% \describecolumn+[ru1es]{v2Sum} are thus the sum of the orders and
% sum of the squares of the orders respectively, of the networks in
% the |rhs| of the rule. The \describecolumn+[ru1es]{overl4pTimes} are
% the number of ambiguities constructed for a rule, and
% \describecolumn+[ru1es]{overl4pSum} is the sum of the orders of all
% overlap regions in these ambiguities.
% \begin{tcl}
%<*lazier>
lappend DB_column(ru1es) vSum REAL v2Sum REAL \
overl4pSum {REAL DEFAULT 0} overl4pTimes {INTEGER DEFAULT 0}
%</lazier>
% \end{tcl}
% The main reason to make |overl4pSum| a real is that I didn't at the
% time find any other way to avoid integer division above (the proper
% SQL construct is |CAST (overl4pSum AS REAL)|). The real
% declarations could just as well have been integers.
%
% The problem with this set-up is however that a sparse encoding
% (|pa1rs| as a set of pairs of integers) is used for what is really
% a very dense set. When |ru1es| approaches $8000$ elements, |pa1rs|
% is beyond $30$ million elements, and easily using up some gigabyte
% of disk area. A more compact representation is called for, but here
% the problem arises of how much can be accomplished on the SQL side.
% It seems a composite value can reasonably well be taken apart (by
% indexing a string) but not so easily put together again, so
% assistance from \Tcl\ is necessary. The problem on the \Tcl\ side
% would be with retrieving the overlap statistics from the database,
% but this is a minor chunk of data and could therefore be fetched
% even when just adding pairs for a new rule. Hence the best solution
% seems to be to actually perform the above calculations on the \Tcl\
% side.
%
% The dense representation of is therefore kept in a table
% \describestring+[table]{pa1rMap} with two columns
% \describecolumn+[pa1rMap]{m1nus} and
% \describecolumn+[pa1rMap]{plu5es}. The first column contains
% integers referencing the |num6er| in |ru1es|, while the second
% column contains BLOBs whose binary patterns encode which |p1us|
% values would form a pair with the |m1nus|. Each bit corresponds to
% one pair, where $1$ means ``pair is in |pa1rMap|'' and $0$ means it
% isn't (it might be in |pa1rs|, or it might have been processed
% already). The BLOB is to be |binary scan|ned and |format|ted as
% |B*|, where the first digit corresponds to |ru1e| $1$.
% \begin{tcl}
%<*lazier>
lappend DB_column(pa1rMap) m1nus {INTEGER PRIMARY KEY} plu5es BLOB
%</lazier>
% \end{tcl}
%
%
%
% \subsection{Initialising the database}
%
%
% \begin{proc}{prepare_statements}
% This procedure creates TDBC statement objects for database
% transactions that are carried out frequently, and stores them
% into the |DBCMD| array. The call syntax is
% \begin{displaysyntax}
% |prepare_statements| \word{db}
% \end{displaysyntax}
% and there is no particular return value.
%
% The first order of business is to retrieve the |signature|, since
% that is needed to construct proper statements involving profiles.
% \begin{tcl}
proc prepare_statements {db} {
variable DBCMD
set signature [dict get [lindex [
$db allrows {SELECT va1ue FROM le9end WHERE k3y='signature'}
] 0] va1ue]
set pcols [dict keys [network_profile $signature]]
% \end{tcl}
%
% \begin{arrayentry}{DBCMD}{Divisors}
% The |Divisors| statement finds proper active rules whose |lhs|
% might have an instance in a network with the provided profile.
% \begin{tcl}
set L [list {(st4te = 'R')}]
foreach col $pcols {lappend L "($col <= :$col)"}
set DBCMD(Divisors) [$db prepare "
SELECT num6er,lhs,rhs,feed6acks
FROM ru1es WHERE ( [join $L " AND "] )
"]
% \end{tcl}
% This is used when reducing expressions, to find rules that
% might act nontrivially.
% \end{arrayentry}
%
% \begin{arrayentry}{DBCMD}{EDivisors}
% The |EDivisors| statement finds equalities whose |lhs| might
% have an instance in a network with the provided profile.
% \begin{tcl}
set L [list {(st4te = 'E' OR st4te = 'e')}]
foreach col $pcols {lappend L "($col <= :$col)"}
set DBCMD(EDivisors) [$db prepare "
SELECT num6er,lhs,rhs,feed6acks
FROM ru1es WHERE ( [join $L " AND "] )
"]
% \end{tcl}
% This is used when checking whether the end result of resolving
% an ambiguity is just a duplicate of some known equality. It is
% necessary to try also the inactive equalities, because
% equalities can deactivate each other.
% \end{arrayentry}
%
% \begin{arrayentry}{DBCMD}{RDividedBy}
% The |RDividedBy| statement finds active rules or equalities
% whose |lhs| might contain an instance of a network with the
% provided profile.
% \begin{tcl}
set L [list {(st4te = 'R' OR st4te = 'E')}]
foreach col $pcols {lappend L "($col >= :$col)"}
set DBCMD(RDividedBy) [$db prepare "
SELECT num6er,st4te,lhs,rhs,feed6acks
%<lazier> , vSum, v2Sum
FROM ru1es WHERE ( [join $L " AND "] )
"]
% \end{tcl}
% This is used to find rules that should be dropped when a new
% rule has been added. It is also used to discover the |num6er|
% assigned to a newly created rule.
% \end{arrayentry}
%
% \begin{arrayentry}{DBCMD}{RDividedBy2}
% The |RDividedBy| statement finds active rules or equalities
% whose |lhs| might contain an instance of a network with the
% provided profile and whose |num6er|s are greater than one that
% is specified.
% \begin{tcl}
set L [list {(st4te = 'R' OR st4te = 'E')}]
lappend L {(num6er > :num6er)}
foreach col $pcols {lappend L "($col >= :$col)"}
set DBCMD(RDividedBy2) [$db prepare "
SELECT num6er,st4te,lhs,rhs,feed6acks
FROM ru1es WHERE ( [join $L " AND "] )
"]
% \end{tcl}
% This is used to find rules that might imply an ambiguity with
% the given profile can be discarded by ``Buchberger's second
% criterion''.
% \end{arrayentry}
%
% \begin{arrayentry}{DBCMD}{ADividedBy}
% The |ADividedBy| statement finds the |num6er|s of active or
% suspended ambiguities whose |si7e| might contain an instance
% of a network with the provided profile.
% \begin{tcl}
%<*aprofile>
set L [list {(st4te = 'A' OR st4te = 'S')} {(p1us NOTNULL)}]
foreach col $pcols {lappend L "($col >= :$col)"}
set DBCMD(ADividedBy) [$db prepare "
SELECT num6er FROM am6iguities WHERE ( [join $L " AND "] )
"]
%</aprofile>
% \end{tcl}
% This is used to find ambiguities that can be discarded by
% ``Buchberger's second criterion'', hence the restriction that
% |p1us| must be non-null.
%
% This statement used to return full rows of the |am6iguities|