This repository has been archived by the owner on Sep 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathleaguepatchnotes
1542 lines (1277 loc) · 53.5 KB
/
leaguepatchnotes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
import sys
import re
import json
import logging
from bs4 import BeautifulSoup
logger = logging.getLogger("patchnotesparser")
def soup_match(elem, selector):
"""Return True if the given soup element matches given selector-like string"""
# lazy parsing, sufficient for our needs
for m in re.finditer(r"([.#]?)([a-zA-Z0-9_-]+)", selector):
c, name = m.groups()
if not c:
if elem.name != name:
return False
elif c == "#":
if elem.get("id") != name:
return False
elif c == ".":
if name not in elem.get("class", []):
return False
return True
def soup_has_string(elem):
return any(isinstance(elem, str) for elem in elem.children)
def soup_extract_labels(elem):
"""Extract labels from an element
Check for labels followed at the beginning given element.
Remove them from the element and return them as a list of strings.
"""
labels = []
while elem.select_one("> span"):
e = elem.next
if e.string is None or e.get("class") != [e.string]:
break # stop at first non-label element
labels.append(e.string)
e.extract()
return labels
def soup_pretty_text(elem):
"""Get element's text and prettify it"""
# replace <br/> by a special pattern for later replacement
for e in elem.find_all("br"):
e.replace_with("<{br}>")
text = re.sub(r"[\n\t ]+", " ", elem.get_text())
text = text.replace("<{br}>", "\n")
return text.strip()
def extract_youtube_id(url):
m = re.search(r"//(?:www.youtube.com/(?:embed/|watch\?v=)|youtu.be/)([^?&]+)", url)
return m.group(1)
class PatchHeader:
"""
Generic representation of top-level section
"""
def __init__(self, header, divs):
h2 = header.select_one("h2")
self.id = h2.get("id")
self.title = h2.text
logger.debug(f"parse header-primary {self.title!r}")
self.blocks = [PatchHeaderBlock(div) for div in divs]
def serialize(self):
return {
"id": self.id,
"title": self.title,
"blocks": [o.serialize() for o in self.blocks],
}
class PatchBlockBase:
"""
Base block parser with common helpers
"""
def __init__(self):
self.id = None
self.title = None
self.title_link = None
self.title_image = None # may be a list
self.labels = None
self.summary = None
self.context = None
def serialize(self):
return {
"id": self.id,
"title": self.title,
"titleLink": self.title_link,
"titleImage": self.title_image,
"labels": self.labels,
"summary": self.summary,
"context": self.context,
}
def _set_or_same(self, attr, value):
"""Set attribute value, fail if it overrides a different one, ignore if None"""
if value is None:
return
old = getattr(self, attr, None)
if old is None:
setattr(self, attr, value)
else:
assert old == value, f"new mismatch for '{attr}'"
def parse_title(self, title):
assert len(title), "empty title"
self._set_or_same("id", title.get("id"))
images = []
# handle cases when there are multiple images
while title.next.name == "img":
elem = title.next
images.append(elem["src"])
elem.extract()
if len(images) == 1:
self._set_or_same("title_image", images[0])
elif len(images) > 1:
self._set_or_same("title_image", images)
self.labels = soup_extract_labels(title)
assert len(title) == 1, "multiple elements in block title"
elem = title.next
if elem.name == "a":
self._set_or_same("title_link", elem.get("href"))
self._set_or_same("title", elem.string.strip())
else:
assert isinstance(elem, str)
self._set_or_same("title", elem.strip())
def try_summary(self, elem):
if soup_match(elem, "p.summary"):
self.summary = soup_pretty_text(elem)
return True
return False
def try_context(self, elem):
if soup_match(elem, "blockquote.context"):
self.context = soup_pretty_text(elem)
return True
return False
class PatchHeaderBlock(PatchBlockBase):
"""
Generic representation of a white block from patch notes
"""
def __init__(self, block=None):
super().__init__()
self.changes = []
self.contents = [] # unparsed "body" elements
if block:
self.parse(block)
def serialize(self):
d = super().serialize()
d["changes"] = [o.serialize() for o in self.changes]
return d
def parse(self, block):
assert len(block), "empty block"
children = block.children
try:
# look for "header" elements, in a given order
elem = next(children)
if soup_match(elem, ".reference-link"):
self.parse_reference_link(elem)
elem = next(children)
# handle labels (not in an element, cannot use soup_extract_labels())
# also, they have an extract "float-left" attribute
self.labels = soup_extract_labels(elem)
while elem.name == "span":
if elem.string is None or elem.get("class") != [elem.string, "float-left"]:
break # stop at first non-label element
self.labels.append(elem.string)
elem = next(children)
if soup_match(elem, "h3"):
# note: usually, class="change-title"
self.parse_title(elem)
elem = next(children)
if self.try_summary(elem):
elem = next(children)
if self.try_context(elem):
elem = next(children)
def skip_divider():
nonlocal elem
if soup_match(elem, "hr.divider"):
elem = next(children)
return True
return False
skip_divider()
# look for changes (right after known headers)
# title is optional
while soup_match(elem, "h4.change-detail-title") or soup_match(elem, "div.attribute-change"):
if elem.name == "h4":
title, elem = elem, next(children)
else:
title = None
# stop at next divider
change_elems = []
try:
# some minor blocks don't add a divider between all elements
while not (soup_match(elem, "hr.divider") or soup_match(elem, "h4.change-detail-title")):
change_elems.append(elem)
elem = next(children)
skip_divider()
finally:
assert change_elems, "change-detail-title without elements"
self.changes.append(PatchChangeDetails(title, change_elems))
# don't forget to add the current element
self.contents = [elem]
self.contents.extend(children)
except StopIteration:
pass
def parse_reference_link(self, elem):
if elem.name == "a":
self._set_or_same("title_link", elem.get("href"))
else:
assert elem.name == "div", "unexpected .reference-link element: {elem.name}"
assert len(elem) == 1
img = elem.select_one("> img")
assert img, "expected <img> reflink"
self._set_or_same("title_image", img["src"])
class PatchChangeDetails(PatchBlockBase):
"""
Representation of a change block from patch notes
"""
def __init__(self, title, elems):
super().__init__()
self.is_ability = None
self.changes = []
if title is not None:
self.parse_title(title)
self.is_ability = "ability-title" in title["class"]
self.parse_body(elems)
def serialize(self):
d = super().serialize()
d["is_ability"] = self.is_ability
d["changes"] = [o.serialize() for o in self.changes]
return d
def parse_body(self, elems):
children = iter(elems)
try:
# look for "header" elements, in a given order
elem = next(children)
if self.try_summary(elem):
elem = next(children)
if self.try_context(elem):
elem = next(children)
while soup_match(elem, "div.attribute-change"):
self.changes.append(PatchAttributeChange(elem))
elem = next(children)
assert False, f"unexpected element in change details: {elem.name}"
except StopIteration:
pass
assert self.changes, "empty change details"
class PatchAttributeChange:
"""
Single attribute change (usually "title: before => change")
"""
def __init__(self, div):
self.title = None
self.labels = None
self.before = None # removed or before change
self.after = None
self.parse(div)
def serialize(self):
d = {
"title": self.title,
"before": self.before,
"after": self.after,
}
if self.labels:
d["labels"] = self.labels
return d
def parse(self, div):
spans = [] # ("class", span)
for elem in div.children:
assert elem.name == "span"
assert len(elem.get("class", [])) == 1
spans.append((elem["class"][0], elem))
assert len(spans) >= 2, "unexpected span count in attribute change"
# first span: attribute name (title)
name, span = spans[0]
assert name == "attribute"
self.labels = soup_extract_labels(span)
if len(span) == 0:
# no title (may happen when there is a label)
self.title = ""
else:
assert len(span) == 1 and span.string is not None
self.title = span.string
# multiple configurations are possible, gues it from second span
first_name = spans[1][0]
if len(spans) == 2 and first_name == "attribute-after":
# 'after' only
self.after = soup_pretty_text(spans[1][1])
elif len(spans) == 2 and first_name == "attribute-removed":
# 'before' only
self.before = soup_pretty_text(spans[1][1])
elif len(spans) == 5 and first_name == "attribute-after":
# 'prefix before => after'
# Prepend 'prerix' to 'before' and 'after'.
#TODO Tag only one part of the whole change as before/after;
# this would require to guess the part of "after" that matches "before".
_, prefix, before, change, after = spans
prefix_text = soup_pretty_text(prefix[1]) + " "
assert before[0] == "attribute-before"
self.before = prefix_text + soup_pretty_text(before[1])
assert change[0] == "change-indicator"
assert after[0] == "attribute-after"
self.after = prefix_text + soup_pretty_text(after[1])
elif len(spans) == 4 and first_name == "attribute-before":
# 'before => after'
_, before, change, after = spans
self.before = soup_pretty_text(before[1])
assert change[0] == "change-indicator"
assert after[0] == "attribute-after"
self.after = soup_pretty_text(after[1])
else:
assert False, f"unexpected spans configuration: {', '.join(s[0] for s in spans)}"
class ElementChangeBase:
"""
Base change for a whole element (champion, item, "other", ...).
Used for final serialization (not intermediate patch parsing).
Unset fields are not output.
"""
def __init__(self):
self.summary = None
self.context = None
self.labels = []
self.mid_patch_update = False
def serialize(self):
d = {}
if self.summary:
d["summary"] = self.summary
if self.context:
d["context"] = self.context
if self.labels:
d["labels"] = self.labels
if self.mid_patch_update:
d["midPatchUpdate"] = self.mid_patch_update
return d
class BasicChangeGroup:
"""Change group without superfluous properties"""
def __init__(self, change: PatchChangeDetails = None):
self.title = None
self.changes = None
if change:
self.parse_change_details(change)
def serialize(self):
return {
"title": self.title,
"changes": [o.serialize() for o in self.changes],
}
def parse_change_details(self, change):
self.title = change.title
self.changes = change.changes
assert not change.is_ability
assert not change.title_link
assert not change.title_image
assert not change.labels
assert not change.summary
assert not change.context
class ChampionChange(ElementChangeBase):
def __init__(self, block: PatchHeaderBlock = None):
super().__init__()
self.name = None
self.abilities = []
self.others = [] # uncategorized changes
if block:
self.parse_block(block)
def serialize(self):
d = super().serialize()
d["name"] = self.name
d["abilities"] = [o.serialize() for o in self.abilities]
d["others"] = [o.serialize() for o in self.others]
return d
def parse_block(self, block: PatchHeaderBlock):
self.name = self.name_from_image_url(block.title_image)
self.summary = block.summary
self.context = block.context
assert not block.labels, "unexpected labels on champion (not handled yet)"
for change in block.changes:
if change.is_ability:
self.abilities.append(ChampionAbilityChange(change))
else:
self.others.append(BasicChangeGroup(change))
assert not block.contents, f"unparsed elements in champion block of '{self.name}'"
@staticmethod
def name_from_image_url(url):
assert url is not None
m = re.search(r"//ddragon.leagueoflegends.com/cdn/[^/]+/img/champion/([^.?]+)\.png", url)
return m.group(1)
@classmethod
def split_block(cls, block: PatchHeaderBlock, mid_patch_update=False):
"""Handle a single block with changes for multiple champions
Copy the summary and context to each individual champions.
Handle groups with multiple images.
"""
assert not block.title_image
assert not block.title_link
assert not block.contents
for group in block.changes:
images = group.title_image
if isinstance(images, str):
images = [images]
for image in images:
change = cls()
change.name = cls.name_from_image_url(image)
if block.summary:
assert not group.summary, "cannot split: multiple summaries provided"
change.summary = block.summary
else:
change.summary = group.summary
if block.context:
assert not group.context, "cannot split: multiple summaries provided"
change.context = block.context
else:
change.context = group.context
#XXX identify ability changes based on title
change.others = group.changes
change.mid_patch_update = mid_patch_update
yield change
class ChampionNewOrReworked:
"""New or reworked champion"""
def __init__(self, block: PatchHeaderBlock = None):
self.name = None
self.reveal_link = None
self.spotlight_youtube_id = None
self.trailer_youtube_id = None
if block:
self.parse_block(block)
def serialize(self):
return {
"name": self.name,
"newOrReworked": True,
"revealLink": self.reveal_link,
"spotlightYoutubeId": self.spotlight_youtube_id,
"trailerYoutubeId": self.trailer_youtube_id,
}
def parse_block(self, block: PatchHeaderBlock):
self.name = ChampionChange.name_from_image_url(block.title_image)
# assume:
# - a context with a small announcement (skipped)
# - a list of links
# - an "available on League Display" block (skipped)
assert not block.labels
assert not block.changes
assert not block.summary
assert block.contents and block.contents[0].name == "ul"
for elem in block.contents[0]:
assert elem.name == "li"
assert len(elem) == 1
link = elem.select_one("> a")["href"]
text = elem.string
if text == "Champion Reveal":
self.reveal_link = link
elif text == "Champion Spotlight":
self.spotlight_youtube_id = extract_youtube_id(link)
elif text == "Champion Trailer" or text == "Champion Teaser":
self.trailer_youtube_id = extract_youtube_id(link)
else:
assert False, f"unexpected new/reworked champion link: {text!r}"
class ChampionAbilityChange:
def __init__(self, change: PatchChangeDetails = None):
self.key = None # Q, W, E, R, P (passive)
self.name = None # pretty name
self.summary = None
self.context = None
self.labels = None
self.changes = []
if change:
self.parse_change(change)
def serialize(self):
d = {
"key": self.key,
"name": self.name,
"changes": [o.serialize() for o in self.changes],
}
if self.summary:
d["summary"] = self.summary
if self.context:
d["context"] = self.context
return d
def parse_change(self, change: PatchChangeDetails):
assert change.is_ability
self.key, self.name = self.parse_ability_name(change.title)
self.summary = change.summary
self.context = change.context
self.labels = change.labels
self.changes = change.changes
@staticmethod
def parse_ability_name(title):
m = re.match(r"^(Q|W|E|R|Passive) - (.*)$", title)
if not m:
return None
key, name = m.groups()
if key == "Passive":
key = "P"
return key, name
class ItemChange(ElementChangeBase):
def __init__(self, block: PatchHeaderBlock = None):
super().__init__()
self.id = None
self.changes = []
if block:
self.parse_block(block)
def serialize(self):
d = super().serialize()
d["id"] = self.id
d["changes"] = [o.serialize() for o in self.changes]
return d
def parse_block(self, block: PatchHeaderBlock):
self.id = self.id_from_image_url(block.title_image)
self.summary = block.summary
self.context = block.context
self.labels = block.labels
for change in block.changes:
self.changes.append(BasicChangeGroup(change))
assert not block.contents, f"unparsed elements in item block of {block.title!r}"
@classmethod
def split_block(cls, block: PatchHeaderBlock, mid_patch_update=False):
"""Handle a single block with changes for multiple items
Copy the summary and context to each individual items.
Handle groups with multiple images.
"""
assert not block.title_image
assert not block.title_link
assert not block.contents
for group in block.changes:
images = group.title_image
if isinstance(images, str):
images = [images]
for image in images:
change = cls()
change.id = cls.id_from_image_url(image)
if block.summary:
assert not group.summary, "cannot split: multiple summaries provided"
change.summary = block.summary
else:
change.summary = group.summary
if block.context:
assert not group.context, "cannot split: multiple summaries provided"
change.context = block.context
else:
change.context = group.context
change.changes = group.changes
change.mid_patch_update = mid_patch_update
yield change
@staticmethod
def id_from_image_url(url):
assert url is not None
m = re.search(r"//ddragon.leagueoflegends.com/cdn/[^/]+/img/item/(\d+)\.png", url)
return int(m.group(1))
class SummonerSpellChange(ElementChangeBase):
def __init__(self, block: PatchHeaderBlock = None):
super().__init__()
self.name = None
self.changes = []
if block:
self.parse_block(block)
def serialize(self):
d = super().serialize()
d["name"] = self.name
d["changes"] = [o.serialize() for o in self.changes]
return d
def parse_block(self, block: PatchHeaderBlock):
self.name = self.name_from_image_url(block.title_image)
self.summary = block.summary
self.context = block.context
self.labels = block.labels
# assume changes are not in groups
assert len(block.changes) <= 1
if block.changes:
change = block.changes[0]
assert not change.title_link
assert not change.title_image
assert not change.labels
assert not change.summary
assert not change.context
self.changes = change.changes
assert not block.contents, f"unparsed elements in summoner spell block of '{self.name}'"
@staticmethod
def name_from_image_url(url):
assert url is not None
m = re.search(r"//ddragon.leagueoflegends.com/cdn/[^/]+/img/spell/(?:Summoner)?([^/.?&]+)\.png", url)
if m:
return m.group(1)
m = re.search(r"/public/images/articles/[^/]+/[^/]+/[^/]+/(Heal)\.(?:jpg|png)", url)
return m.group(1)
class RuneChange(ElementChangeBase):
def __init__(self, block: PatchHeaderBlock = None):
super().__init__()
self.name = None
self.changes = []
if block:
self.parse_block(block)
def serialize(self):
d = super().serialize()
d["name"] = self.name
d["changes"] = [o.serialize() for o in self.changes]
return d
def parse_block(self, block: PatchHeaderBlock):
self.name = self.name_from_image_url(block.title_image)
self.summary = block.summary
self.context = block.context
self.labels = block.labels
# assume changes are not in groups
assert len(block.changes) <= 1
if block.changes:
change = block.changes[0]
assert not change.title_link
assert not change.title_image
assert not change.labels
assert not change.summary
assert not change.context
self.changes = change.changes
assert not block.contents, f"unparsed elements in rune block of '{self.name}'"
@staticmethod
def name_from_image_url(url):
assert url is not None
m = re.search(r"//ddragon.leagueoflegends.com/cdn/img/perk-images/Styles/[^/]+/([^/]+)/[^.?]+\.png", url)
if m:
return m.group(1)
m = re.search(r"/public/images/articles/[^/]+/[^/]+/[^/]+/([^.?&_]+)(?:_Rune)?\.(?:jpg|png)", url)
return m.group(1)
class OtherChange(ElementChangeBase):
def __init__(self, elem=None, nested=False):
super().__init__()
self.title = None
self.changes = []
self.nested = nested
if isinstance(elem, PatchHeader):
self.parse_header(elem, nested)
elif isinstance(elem, PatchHeaderBlock):
if nested:
self.parse_header_block_nested(elem)
else:
self.parse_header_block(elem)
elif elem is not None:
raise TypeError(elem)
def serialize(self):
d = super().serialize()
d["title"] = self.title
d["changes"] = [o.serialize() for o in self.changes]
if self.nested:
d["nested"] = self.nested
return d
def parse_header(self, header: PatchHeader, nested=False):
assert header.title
self.title = header.title
assert len(header.blocks) == 1
block = header.blocks[0]
# Usually, "other" changes are a single block. Add a special case
# for nested ones and require to be explicit about it.
if nested:
self.parse_header_block_nested(block)
else:
self.parse_header_block(block)
def parse_header_block(self, block: PatchHeaderBlock):
if block.title:
assert self.title is None
self.title = block.title
assert not block.title_link
assert not block.title_image
assert not block.labels
self.summary = block.summary
self.context = block.context
# assume changes are not in groups
# (use "nested" flag otherwise)
assert len(block.changes) <= 1
if block.changes:
change = block.changes[0]
assert not change.title_link
assert not change.title_image
assert not change.labels
assert not change.summary
assert not change.context
self.changes = change.changes
assert not block.contents, f"unparsed elements in 'other' block of '{self.title}'"
def parse_header_block_nested(self, block: PatchHeaderBlock):
self.nested = True
if block.title:
assert self.title is None
self.title = block.title
assert not block.title_link
assert not block.title_image
assert not block.labels
self.summary = block.summary
self.context = block.context
for block_change in block.changes:
assert not block_change.title_link
assert not block_change.title_image
change = OtherChange()
change.title = block_change.title
change.summary = block_change.summary
change.context = block_change.context
change.labels = block_change.labels
change.changes = block_change.changes
self.changes.append(change)
assert not block.contents, f"unparsed elements in 'other' block of '{self.title}'"
patch_notes_parsers = {} # {version: parser_class}
class PatchNotesParserMeta(type):
"""Meta class to register version-specific parsers"""
def __new__(mcs, name, bases, fields):
cls = type.__new__(mcs, name, bases, fields)
patch_version = fields.get("parser_patch_version")
if patch_version is not None:
assert patch_version not in patch_notes_parsers, f"multiple PatchNotesParser for version {patch_version}"
patch_notes_parsers[patch_version] = cls
return cls
class PatchNotesParser(object, metaclass=PatchNotesParserMeta):
"""
Parse League of Legends patch notes into a generic structure
Parsing is lenient but will inform about any suspicious unparsed elements.
"""
# Define in subclasses for patch version specific handling
parser_patch_version = None
# List of new and reworked champions (ddragon's name)
# Note: only the links will be preserved by the default handling.
parser_new_or_reworked_champions = []
# List of header IDs to put in "others" (see dispatch_header())
parser_others_headers = []
# List of header IDs to ignore (see skipped in dispatch_header())
parser_removed_headers = []
# List of block IDs to remove (see remove_block_by_title_id())
parser_removed_blocks = []
def __init__(self, soup):
self.version = self.get_version_from_soup(soup)
self.summary = None
self.highlights_youtube_id = None
self.champions = []
self.items = []
self.summoner_spells = []
self.runes = []
self.bugfixes = []
self.others = [] # uncategorized headers
self.normalize_soup(soup)
container = soup.select_one("#patch-notes-container")
self.parse_container(container)
def serialize(self):
return {
"version": self.version,
"summary": self.summary,
"highlightsYoutubeId": self.highlights_youtube_id,
"champions": [o.serialize() for o in self.champions],
"items": [o.serialize() for o in self.items],
"summonerSpells": [o.serialize() for o in self.summoner_spells],
"runes": [o.serialize() for o in self.runes],
"bugfixes": self.bugfixes,
"others": [o.serialize() for o in self.others],
}
@staticmethod
def get_soup(filename):
with open(filename, "r", encoding="utf-8") as f:
return BeautifulSoup(f, "html.parser")
@staticmethod
def get_version_from_soup(soup):
title = soup.select_one("title").text.strip()
m = re.match(r"^Patch (\d+\.\d+) Notes", title, re.I)
return m.group(1)
@classmethod
def normalize_soup(cls, soup):
"""Modify the soup to fix parsing problems"""
# remove all empty strings (after stripping)
for e in soup.find_all(string=True):
if not e.strip():
e.extract()
container = soup.select_one("#patch-notes-container")
# fix highlight's <iframe> not in subdivs (happen in some patches)
elem = container.select_one("header.header-primary + div.content-border > div.white-stone > iframe")
if elem:
logger.debug("wrap highlight's <iframe>")
elem.wrap(soup.new_tag("div"))
# remove unwanted blocks
# note: headers are not removed here because their blocks need to be grouped first
for title_id in cls.parser_removed_blocks:
assert cls.remove_block_by_title_id(container, title_id), f"failed to remove block {title_id!r}"
@staticmethod
def remove_block_by_title_id(soup, title_id):
"""Remove a section block, based on it's ID"""
# don't use select_one(), some IDs contain ','
elem = soup.find("h3", id=title_id)
if not elem:
return False
elem = elem.parent.parent.parent
assert soup_match(elem, "div.content-border")
elem.extract()
return True
@classmethod
def from_file(cls, filename):
"""Parse patch notes from a filename, return a PatchNotesParser"""
soup = cls.get_soup(filename)
return cls.from_soup(soup)
@classmethod
def from_soup(cls, soup):
"""Parse patch notes from a soup, return a PatchNotesParser
Note: parsing is destructive. The soup will be modified.
"""
version = cls.get_version_from_soup(soup)
parser = patch_notes_parsers.get(version)
if parser is None:
parser = cls
logger.info(f"parsing patch notes for version {version} with default parser")
else:
logger.info(f"parsing patch notes for version {version} with specific parser")
return parser(soup)
def parse_container(self, container):
"""Parse the top-level container"""
assert not soup_has_string(container), "unexpected string in top-level container"
self.summary = soup_pretty_text(container.select_one("> blockquote.context"))
# filter useless top-level container elements
# note: some patches have multiple <h2 id="patch-top"></h2>
children = []
for elem in container.children:
if elem.name == "br":
continue
if elem.name == "p" and not elem.contents:
continue # empty <p>
if soup_match(elem, "h2#patch-top"):
assert not elem.contents, "h2#patch-top not empty"
continue
if elem.name == "p" and elem.select("> a.btt"):
# back-to-top link
continue
children.append(elem)
assert soup_match(children[0], "blockquote.context")
assert soup_match(children[1], "div.context-designers")