forked from aisejohan/stacks-history
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_history.py
1424 lines (1231 loc) · 40.9 KB
/
create_history.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Get detailed information out of the Stacks project history
import subprocess
import re
import Levenshtein
import copy
import pickle
import os
from definitions_history import *
from functions_history import *
from print_functions_history import *
# Get tex file names out of list of files
def get_names(temp):
names = []
# Get rid of files in subdirectories
# Get rid of non-tex files
# Get rid of the .tex ending
for i in range(0, len(temp)):
file_name = temp[i]
if file_name.find('/') >= 0:
continue
if '.tex' not in file_name:
continue
names.append(file_name[:-4])
return names
# List files in given commit
def get_names_commit(commit):
temp = subprocess.check_output(["git", "-C", websiteProject, "ls-tree", "--name-only", commit])
temp = temp.decode("latin-1", "backslashreplace")
return get_names(temp.splitlines())
# Does a file exist at a given commit in ../stacks-project
def exists_file(filename, commit):
if subprocess.check_output(["git", "-C", websiteProject, "ls-tree", '--name-only', commit, '--', filename]):
return True
return False
# Get a file at given commit in ../stacks-project
# Assumes the file exists
def get_file(filename, commit):
temp = subprocess.check_output(["git", "-C", websiteProject, "cat-file", '-p', commit + ':' + filename])
temp = temp.decode("latin-1", "backslashreplace")
return temp
# Finds all environments in ../stacks-project/name.tex at given commit
# and returns it as a pair [envs_with_proofs, envs_without_proofs]
# of lists of classes as above
def get_envs(name, commit):
# We will store all envs in the following list
envs = []
# Check if the file exists, if not exit
if not exists_file(name + '.tex', commit):
return []
# Initialize an empty environment with proof
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
# Initialize an empty environment without proof
Without = env_without_proof('', '', '', '', 0, 0, '')
# Use splitlines(True) to keep line endings
texfile = get_file(name + '.tex', commit).splitlines(True)
line_nr = 0
in_with = 0
need_proof = 0
in_proof = 0
in_without = 0
for line in texfile:
line_nr = line_nr + 1
if in_proof:
With.proof += line
if line.find('end{proof}') >= 0:
With.ep = line_nr
in_proof = 0
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
if in_with:
With.text += line
if line.find('end{' + With.type + '}') >= 0:
With.e = line_nr
need_proof = 1
in_with = 0
if in_without:
Without.text += line
if line.find('end{' + Without.type + '}') >= 0:
Without.e = line_nr
in_without = 0
envs.append(Without)
Without = env_without_proof('', '', '', '', 0, 0, '')
if line.find('begin{') >= 0:
# Ignore a proof if we do not need one
if need_proof and line.find('begin{proof}') >= 0:
With.proof = line
With.bp = line_nr
in_proof = 1
need_proof = 0
for type in with_proofs:
if line.find('begin{' + type + '}') >= 0:
# wipe out unfinished environment
if in_with:
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_with = 0
# no proof present, but finished
elif need_proof:
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
need_proof = 0
# unfinished proof for finished environment
elif in_proof:
With.bp = 0
With.ep = 0
With.proof = ''
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_proof = 0
# wipe out unfinished environment
if in_without:
Without = env_without_proof('', '', '', '', 0, 0, '')
in_without = 0
With.name = name
With.type = type
if not With.label == '':
print("Error: Label with already present")
exit(1)
With.b = line_nr
With.text = line
in_with = 1
for type in without_proofs:
if line.find('begin{' + type + '}') >= 0:
# wipe out unfinished environment
if in_with:
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_with = 0
# no proof yet but a definition or such in between lemma and proof allowed
elif need_proof:
pass
# unfinished proof for finished environment
elif in_proof:
With.bp = 0
With.ep = 0
With.proof = ''
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_proof = 0
# wipe out unfinished environment
if in_without:
Without = env_without_proof('', '', '', '', 0, 0, '')
in_without = 0
Without.name = name
Without.type = type
if not Without.label == '':
print("Error: Label without already present")
exit(1)
Without.text = line
Without.b = line_nr
in_without = 1
# Only first label gets picked
if (in_with and With.label == '') or (in_without and Without.label == ''):
n = line.find('\\label{')
if n >= 0:
n = n + 6
m = line.find('}', n)
label = line[n + 1 : m]
if in_with:
With.label = label
else:
Without.label = label
# Clean up
# wipe out unfinished environment
if in_with:
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_with = 0
# no proof
elif need_proof:
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
need_proof = 0
# unfinished proof for finished environment
elif in_proof:
With.bp = 0
With.ep = 0
With.proof = ''
envs.append(With)
With = env_with_proof('', '', '', '', 0, 0, '', 0, 0, '')
in_proof = 0
# wipe out unfinished environment
if in_without:
Without = env_without_proof('', '', '', '', 0, 0, '')
in_without = 0
return envs
# Returns a dictionary labels ---> tags if tags/tags exists
def find_tags(commit):
tags = {}
# Check if there are tags
if not exists_file('tags/tags', commit):
return tags
tagsfile = get_file('tags/tags', commit).splitlines()
for line in tagsfile:
if not line.find('#') == 0:
taglabel = line.split(",")
tags[taglabel[1]] = taglabel[0]
return tags
# Find parents of a commit
def find_parents(commit):
commits = subprocess.check_output(["git", "-C", websiteProject, "rev-list", "-n1", "--topo-order", "--parents", commit])
commits = commits.decode("latin-1", "backslashreplace")
commits = commits.rstrip().split(' ')
if not commits[0] == commit:
print("Error: Unexpected format in find_parents.")
exit (1)
if len(commits) == 2:
return [commits[1]]
if len(commits) == 3:
return [commits[1], commits[2]]
if len(commits) > 3:
print('Error: Unexpected number of parents!')
exit(1)
# Finds all commits in ../stacks-project
def find_commits():
commits = subprocess.check_output(["git", "-C", websiteProject, "rev-list", "--topo-order", "master"])
commits = commits.decode("latin-1", "backslashreplace")
# Reverse the list so that 0 is the first one
return commits.splitlines()[::-1]
# gets next commit
def next_commit(commit):
commits = find_commits()
i = 0
while i < len(commits) - 1:
if commit == commits[i]:
return commits[i + 1]
i = i + 1
print("Warning: There is no next commit!")
return ''
# Get diff between two commits in a given file
# commit_before should be prior in history to commit_after
def get_diff_in(name, commit_before, commit_after):
diff = subprocess.check_output(["git", "-C", websiteProject, "diff", "--patience", "-U0", commit_before + '..' + commit_after, '--', name + '.tex'])
diff = diff.decode("latin-1", "backslashreplace")
return diff.splitlines()
# Regular expressions to parse diffs
two_commas = re.compile('\@\@\ \-([0-9]*)\,([0-9]*)\ \+([0-9]*)\,([0-9]*)\ \@\@')
first_comma = re.compile('\@\@\ \-([0-9]*)\,([0-9]*)\ \+([0-9]*)\ \@\@')
second_comma = re.compile('\@\@\ \-([0-9]*)\ \+([0-9]*)\,([0-9]*)\ \@\@')
no_comma = re.compile('\@\@\ \-([0-9]*)\ \+([0-9]*)\ \@\@')
# Gets a list of line_nr changes between two commits
# in a given file
# commit_before should be prior in history to commit_after
def get_changes_in(name, commit_before, commit_after):
diff = get_diff_in(name, commit_before, commit_after)
lines_removed = []
lines_added = []
for line in diff:
if line.find('@@') == 0:
# The line looks like
# @@ -(old line nr),d +(new line nr),a @@
# meaning 5 lines where removed from old file starting at
# old line nr and a lines were added started at new line nr
# Variant: ',d' is missing if d = 1
# Variant: ',a' is missing if a = 1
# total of 4 cases matching the regular expressions compiled above
result = two_commas.findall(line)
if len(result) == 1 and len(result[0]) == 4:
lines_removed.append([int(result[0][0]), int(result[0][1])])
lines_added.append([int(result[0][2]), int(result[0][3])])
continue
result = first_comma.findall(line)
if len(result) == 1 and len(result[0]) == 3:
lines_removed.append([int(result[0][0]), int(result[0][1])])
lines_added.append([int(result[0][2]), 1])
continue
result = second_comma.findall(line)
if len(result) == 1 and len(result[0]) == 3:
lines_removed.append([int(result[0][0]), 1])
lines_added.append([int(result[0][1]), int(result[0][2])])
continue
result = no_comma.findall(line)
if len(result) == 1 and len(result[0]) == 2:
lines_removed.append([int(result[0][0]), 1])
lines_added.append([int(result[0][1]), 1])
continue
print("Error: Unexpected format of following diff line: ")
print(line)
exit(1)
return [lines_removed, lines_added]
# Gets a list of files changed between two commits
# commit_before should be prior in history to commit_after
def get_names_changed(commit_before, commit_after):
temp = subprocess.check_output(["git", "-C", websiteProject, "diff", "--name-only", commit_before + '..' + commit_after])
temp = temp.decode("latin-1", "backslashreplace")
names_changed = get_names(temp.splitlines())
# Look for deleted files
list_before = get_names_commit(commit_before)
list_after = get_names_commit(commit_after)
for name in list_before:
if not name in list_after:
print("Info: deleted file: " + name)
# the following should not be necessary, but what the heck
if name not in names_changed:
names_changed.append(name)
return names_changed
# Gets a list of line_nr changes between two commits
# commit_before should be prior in history to commit_after
def get_all_changes(commit_before, commit_after):
all_changes = {}
files_changed = get_names_changed(commit_before, commit_after)
for name in files_changed:
all_changes[name] = get_changes_in(name, commit_before, commit_after)
return all_changes
# Regular expression matching removed and added tags
deleted_tag = re.compile('^\-([0-9A-Z]{4})\,(.*)')
added_tag = re.compile('^\+([0-9A-Z]{4})\,(.*)')
# Gets a list of tag changes between two commits
# commit_before should be prior in history to commit_after
def get_tag_changes(commit_before, commit_after):
tags_removed = []
tags_added = []
diff = subprocess.check_output(["git", "-C", websiteProject, "diff", "--patience", "-U0", commit_before + '..' + commit_after, '--', 'tags/tags'])
diff = diff.decode("latin-1", "backslashreplace")
diff = diff.splitlines()
for line in diff:
deleted = deleted_tag.findall(line)
if len(deleted) > 0:
tags_removed.append([deleted[0][0], deleted[0][1]])
added = added_tag.findall(line)
if len(added) > 0:
tags_added.append([added[0][0], added[0][1]])
return [tags_removed, tags_added]
# Find tags whose labels got changed
def tags_changed_labels(tag_changes):
tags_changed = []
tags_removed = tag_changes[0]
tags_added = tag_changes[1]
n = len(tags_removed)
m = len(tags_added)
i = 0
j = 0
while (i < n) and (j < m):
if tags_removed[i][0] == tags_added[j][0]:
tags_changed.append([tags_removed[i][0], tags_removed[i][1], tags_added[i][1]])
i = i + 1
continue
if tags_removed[i][0] < tags_added[j][0]:
i = i + 1
continue
j = j + 1
return tags_changed
# Add tags to a list of environments
# Overwrites already existing tags
def add_tags(envs, tags):
for env in envs:
long_label = env.name + '-' + env.label
for tag, label in tags:
if label == long_label:
env.tag = tag
continue
# Get all envs from a commit
# Should only be used for the initial commit
def get_all_envs(commit):
all_envs = {}
# get names
names = get_names_commit(commit)
# loop through tex files and add envs
for name in names:
all_envs[name] = get_envs(name, commit)
return all_envs
# Initialize an env_history
def initial_env_history(commit, env):
return env_history(commit, env, [commit], [copy.deepcopy(env)])
# Update an env_history with a given commit and env
# This replaces the current state as well!
def update_env_history(env_h, commit, env):
# Move commit and env to the end of the lists
env_h.commits.append(commit)
env_h.envs.append(copy.deepcopy(env))
env_h.commit = commit
env_h.env = env
# Initialize history
def initial_history():
initial_commit = '3d32323ff9f1166afb3ee0ecaa10093dc764a50d'
all_envs = get_all_envs(initial_commit)
env_histories = []
# there are no tags present so we do not need to add them
for name in all_envs:
for env in all_envs[name]:
env_h = initial_env_history(initial_commit, env)
env_histories.append(env_h)
return history(initial_commit, env_histories, [])
# Logic for pairs: return
# -1 if start + nr - 1 < b
# 0 if intervals meet
# 1 if e < start
def logic_of_pairs(start, nr, b, e):
# If nr = 0, then change starts at start + 1
if nr == 0:
if start < b:
return -1
if e <= start:
return 1
return 0
# now nr > 0 so change starts at start and ends at start + nr - 1
if e < start:
return 1
if start + nr - 1 < b:
return -1
return 0
# Compute shift
def compute_shift(lines_removed, lines_added, i):
if lines_removed[i][1] > 0 and lines_added[i][1] > 0:
return lines_added[i][0] + lines_added[i][1] - lines_removed[i][0] - lines_removed[i][1]
if lines_removed[i][1] == 0:
return lines_added[i][0] + lines_added[i][1] - lines_removed[i][0] - 1
if lines_added[i][1] == 0:
return lines_added[i][0] + 1 - lines_removed[i][0] - lines_removed[i][1]
print("Error: no change where there should be one!")
exit(1)
# See if env from commit_before is changed
# If not changed, but moved inside file, then update line numbers
def env_before_is_changed(env, all_changes):
if not env.name in all_changes:
return False
lines_removed = all_changes[env.name][0]
lines_added = all_changes[env.name][1]
i = 0
while i < len(lines_removed):
start = lines_removed[i][0]
nr = lines_removed[i][1]
position = logic_of_pairs(start, nr, env.b, env.e)
if position == 0:
return True
if position == 1:
break
i = i + 1
# adjust line numbers; i is index of chunk just beyond env
if i > 0:
shift = compute_shift(lines_removed, lines_added, i - 1)
env.b = env.b + shift
env.e = env.e + shift
if env.type in without_proofs:
return False
if env.proof == '':
return False
# The proof could still be after the chunk we are at
while i < len(lines_removed):
start = lines_removed[i][0]
nr = lines_removed[i][1]
position = logic_of_pairs(start, nr, env.bp, env.ep)
if position == 0:
return True
if position == 1:
break
i = i + 1
# adjust line numbers; i is the index of chunk just beyond proof of env
if i > 0:
shift = compute_shift(lines_removed, lines_added, i - 1)
env.bp = env.bp + shift
env.ep = env.ep + shift
return False
# See if env from commit_after is new or changed
def env_after_is_changed(env, all_changes):
if not env.name in all_changes:
return False
lines_added = all_changes[env.name][1]
for start, nr in lines_added:
if logic_of_pairs(start, nr, env.b, env.e) == 0:
return True
if env.type in without_proofs:
return False
if env.proof == '':
return False
for start, nr in lines_added:
if logic_of_pairs(start, nr, env.bp, env.ep) == 0:
return True
return False
# Match text statement and proof if present
def text_match(env1, env2):
if env1.type in without_proofs:
if env1.text == env2.text:
return True
if env1.type in with_proofs:
if env1.text == env2.text and env1.proof == env2.proof:
return True
return False
# Simplest kind of match: name, label, type all match
def label_match(env_b, env_a):
if (env_b.name == env_a.name and env_b.type == env_a.type and env_b.label == env_a.label and not env_a.label == ''):
return True
# Take care of files which were renamed
# We can find these renames using git also...
if (env_b.name == 'intersections' and env_a.name == 'chow' and env_b.type == env_a.type and env_b.label == env_a.label and not env_a.label == ''):
return True
if (env_b.name == 'fpqc-descent' and env_a.name == 'descent' and env_b.type == env_a.type and env_b.label == env_a.label and not env_a.label == ''):
return True
if (env_b.name == 'results' and env_a.name == 'limits' and env_b.type == env_a.type and env_b.label == env_a.label and not env_a.label == ''):
return True
if (env_b.name == 'groupoid-schemes' and env_a.name == 'groupoids' and env_b.type == env_a.type and env_b.label == env_a.label and not env_a.label == ''):
return True
return False
# Closeness score
def closeness_score(env_b, env_a):
score = 0
if env_b.name == env_a.name:
score = score + 0.05
if env_b.type == env_a.type:
score = score + 0.05
if env_b.label == env_a.label and not env_b.label == '':
score = score + 0.1
return(score + Levenshtein.ratio(env_b.text, env_a.text))
# Checking similarity of histories with the same label
def too_similar(History, name, label):
i = 0
while i < len(History.env_histories):
i_env = History.env_histories[i].env
if i_env.name == name and i_env.label == label:
j = i + 1
while j < len(History.env_histories):
j_env = History.env_histories[j].env
if j_env.name == name and j_env.label == label:
if i_env.b <= j_env.b and j_env.b <= i_env.e:
return True
if i_env.b <= j_env.e and j_env.e <= i_env.e:
return True
# Can add test for overlap proofs too
j = j + 1
i = i + 1
return False
# Check line numbers agree
def same_line_nrs(A, B):
if not (A.b == B.b and A.e == B.e):
return False
if A.type in without_proofs:
return True
if not (A.bp == B.bp and A.ep == B.ep):
return False
return True
# Types we do not look at for history
def wrong_type(label, names):
for type in ['equation', 'section', 'subsection', 'subsubsection', 'item']:
if label.find(type) >= 0:
for name in names:
if label.find(name + '-' + type) == 0:
return True
return False
# Return name if label is correct type
def name_in_correct_type(label, names):
# ['lemma', 'proposition', 'theorem']
for type in with_proofs:
if label.find(type) >= 0:
for name in names:
if label.find(name + '-' + type) == 0:
return name
# ['definition', 'example', 'exercise', 'situation', 'remark', 'remarks']
for type in without_proofs:
if label.find(type) >= 0:
for name in names:
if label.find(name + '-' + type) == 0:
return name
return ''
# Find doubles
def find_doubles(word, word_list, double):
if word in word_list:
double.append(word)
return True
word_list.append(word)
return False
# Quick test
def env_in_history(env, History):
for env_h in History.env_histories:
e = env_h.env
if env.name == e.name and env.b == e.b:
return True
return False
# Insert a score in list of scores
def insert_score(score, i, j, scores):
a = 0
while a < len(scores) and score < scores[a][0]:
a = a + 1
scores.insert(a, [score, i, j])
# User interface
def do_these_match(i, j, a, top, envs_h_b, envs_a, matches_a, matches_b, scores, commit_after):
print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
print("Matches left over:")
left_a = 0
for aa in range(a, top):
if not (scores[aa][1] in matches_b or scores[aa][2] in matches_a):
print("Score: " + str(scores[aa][0]))
print_one_line(envs_h_b[scores[aa][1]].env)
print_one_line(envs_a[scores[aa][2]])
left_a = left_a + 1
left_b = len(envs_h_b) - len(matches_b)
print()
print("Commit after: " + commit_after)
print("There are " + str(left_b) + " left to match and " + str(left_a) + " choices left.")
print()
print("MATCH by score: " + str(scores[a][0]))
print('--------------------------------------------------------------------------------')
print_without(envs_h_b[i].env)
print('--------------------------------------------------------------------------------')
print_without(envs_a[j])
print('--------------------------------------------------------------------------------')
while True:
choice = input('Do these match? (y/n): ')
if choice == 'n':
return False
if choice == 'y':
return True
# Main function, going from history for some commit to the next
#
# Problem we ignore for now: history is not linear. Hence
# This will only work if commit_after is **not** a merge and has History.commit as parent
#
def update_history(History, commit_after, debug):
commit_before = History.commit
all_changes = get_all_changes(commit_before, commit_after)
# List of env_histories which are being changed in this commit
envs_h_b = []
for env_h in History.env_histories:
env = env_h.env
# The following line
# updates line numbers of env if not changed
# passes if env is changed
if env_before_is_changed(env, all_changes):
envs_h_b.append(env_h)
# Print data
if debug:
print("Debug: before envs:")
for env_h in envs_h_b:
print_one_line(env_h.env)
# List of new or changed envs in this commit
envs_a = []
all_envs = {}
for name in all_changes:
envs = get_envs(name, commit_after)
all_envs[name] = envs
for env in envs:
# The following line passes if env is changed
if env_after_is_changed(env, all_changes):
envs_a.append(env)
# Print data
if debug:
print("Debug: after envs")
for env in envs_a:
print_one_line(env)
# Get tag changes
tag_changes = get_tag_changes(commit_before, commit_after)
tag_del = tag_changes[0]
tag_new = tag_changes[1]
# Try to match environments between changes
# First time through
matches = []
matches_b = set()
matches_a = set()
i = 0
while i < len(envs_h_b):
env_b = envs_h_b[i].env
j = 0
while j < len(envs_a):
if j in matches_a:
j = j + 1
continue
env_a = envs_a[j]
# Catch the following types of matches:
# name + '-' + label
if label_match(env_b, env_a):
if debug:
print(str(i) + ':', end=' ')
print_one_line(env_b)
print(str(j) + ':', end=' ')
print_one_line(env_a)
matches.append([i, j])
matches_b.add(i)
matches_a.add(j)
break
j = j + 1
i = i + 1
# Second time through compute scores
scores = []
i = 0
while i < len(envs_h_b):
if i in matches_b:
i = i + 1
continue
env_b = envs_h_b[i].env
j = 0
while j < len(envs_a):
if j in matches_a:
j = j + 1
continue
env_a = envs_a[j]
score = closeness_score(env_b, env_a)
# if score > 1.05:
# print "MATCH by score: " + str(score)
# if debug:
# print str(i) + ':',
# print_one_line(env_b)
# print str(j) + ':',
# print_one_line(env_a)
# matches.append([i, j])
# matches_b.add(i)
# matches_a.add(j)
# break
# else:
insert_score(score, i, j, scores)
j = j + 1
i = i + 1
top = 0
while top < len(scores) and scores[top][0] > 0.69:
top = top + 1
a = 0
while a < top:
score = scores[a][0]
i = scores[a][1]
j = scores[a][2]
if not (i in matches_b or j in matches_a):
if True: #do_these_match(i, j, a, top, envs_h_b, envs_a, matches_a, matches_b, scores, commit_after):
print("MATCH by score: " + str(score))
if debug:
print(str(i) + ':', end=' ')
print_one_line(env_b)
print(str(j) + ':', end=' ')
print_one_line(env_a)
matches.append([i, j])
matches_b.add(i)
matches_a.add(j)
a = a + 1
# Add tags to new envs; this rarely does anything
add_tags(envs_a, tag_new)
for i, j in matches:
# carry over the tag if there is one before and not yet after
if envs_a[j].tag == '' and not envs_h_b[i].env.tag == '':
envs_a[j].tag = envs_h_b[i].env.tag
# Because the diff does not just record changes but also
# records pieces of text getting moved, it can happen that
# we think there is a change but all that happened was that
# the environment got moved within
# the file and nothing else changed. In this case we do not
# update the environment history but only adjust the line numbers
if envs_a[j].name == envs_h_b[i].env.name and \
envs_a[j].type == envs_h_b[i].env.type and \
envs_a[j].label == envs_h_b[i].env.label and \
envs_a[j].tag == envs_h_b[i].env.tag and \
text_match(envs_a[j], envs_h_b[i].env):
print("Moved environment:")
print_one_line(envs_a[j])
envs_h_b[i].env.b = envs_a[j].b
envs_h_b[i].env.e = envs_a[j].e
if envs_a[j].type in with_proofs:
envs_h_b[i].env.bp = envs_a[j].bp
envs_h_b[i].env.ep = envs_a[j].ep
else:
# update environment history
update_env_history(envs_h_b[i], commit_after, envs_a[j])
saved_histories = []
i = 0
while i < len(envs_h_b):
if i in matches_b:
i = i + 1
continue
print("Removing:", end=' ')
print_one_line(envs_h_b[i].env)
if debug:
print_env(envs_h_b[i].env)
j = History.env_histories.index(envs_h_b[i])
saved_histories.append(History.env_histories.pop(j))
i = i + 1
# Add left over newly created envs to History
j = 0
while j < len(envs_a):
if j in matches_a:
j = j + 1
continue
env_a = envs_a[j]
env_h = initial_env_history(commit_after, env_a)
History.env_histories.append(env_h)
j = j + 1
# Find dictionary of new tags in terms of labels
names = get_names_commit(commit_after)
new_labels = {}
for tag, label in tag_new:
if wrong_type(label, names):
continue
new_labels[label] = tag
# Get dictionary labels ---> tags
tags = find_tags(commit_after)
# We need to add tags to new_labels to make sure all new envs get correct tag
for env in envs_a:
label = env.name + '-' + env.label
if not env.label == '' and label in tags:
if label in new_labels:
if not new_labels[label] == tags[label]:
print('Warning: double tag ' + new_labels[label] + ', ' + tags[label] + ' for ' + label)
else:
new_labels[label] = tags[label]
# Create dictionary of label ---> tag where tag got removed and label did not get a new tag
# This should almost always be empty
removed_labels = {}
for tag, label in tag_del:
if wrong_type(label, names):
continue
if label in new_labels:
continue
removed_labels[label] = tag
# Add new tags or delete removed tags to histories
for env_h in History.env_histories:
env = env_h.env
label = env.name + '-' + env.label
if not env.label == '' and label in new_labels:
tag = new_labels[label]
# Already there, then done
if env.tag == tag:
if not env_h.commit == commit_after:
if debug:
print('Info: correct tag carried.')
print_one_line(env)
env_h.commit = commit_after
continue
# If there is a tag but it is not the same
if not env.tag == '':
print('Warning: changing ' + env.tag + ' to ' + tag)
if not env_h.commit == commit_after:
# Here a new step in history required
if not env_h.commit == commit_before:
print("Warning: incorrect commit in environment history.")
# set or change tag
env.tag = tag
update_env_history(env_h, commit_after, env)
else:
# Here we need to retroactively update the tag
env_h.envs[-1].tag = tag
env_h.env.tag = tag
elif not env.label == '' and label in removed_labels:
tag = removed_labels[label]
# If not there, then done
if env.tag == '':
if not env_h.commit == commit_after:
print('Error: Wrong commit on history without tag!')
print_one_line(env)
exit(1)
continue
# If there is a tag but it is not the same
if not env.tag == tag:
print('Warning: removing tag ' + env.tag + ' which should have been ' + tag)
if not env_h.commit == commit_after:
# Here a new step in history is required
if not env_h.commit == commit_before:
print("Warning: incorrect commit in environment history.")
# delete tag
env.tag = ''
update_env_history(env_h, commit_after, env)
else:
# Here we need to retroactively remove the tag
env_h.envs[-1].tag = ''
env_h.env.tag = ''
else:
# Finally update commit on environment history in other cases
env_h.commit = commit_after
# Create a dictionary whose keys are changed files and whose values
# are lists of tags pointing to those
all_tags = {}
# After this names will be the list of files changed
names = []
for name in all_envs:
names.append(name)
all_tags[name] = []
# Create the lists
for label in tags:
name = name_in_correct_type(label, names)