Skip to content

Commit

Permalink
* fix inserting duplicate entities of ImageInReply with the same ur…
Browse files Browse the repository at this point in the history
…l filename when they shared across multiple reply to recover the behavior before

5d794b1 @ `ReplyContentImageSaver.Save()`
@ c#/crawler
  • Loading branch information
n0099 committed Jun 14, 2024
1 parent 5bb296e commit c4abb9c
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions c#/crawler/src/Tieba/Crawl/Saver/ReplyContentImageSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ from c in r.ContentsProtoBuf
.DistinctBy(t => (t.Pid, t.ImageInReply.UrlFilename))
.ToList();
if (replyContentImages.Count == 0) return () => { };
var images = replyContentImages.Select(t => t.ImageInReply)
.DistinctBy(image => image.UrlFilename).ToDictionary(image => image.UrlFilename);
var images = replyContentImages
.Select(replyContentImage => replyContentImage.ImageInReply)
.DistinctBy(image => image.UrlFilename)
.ToDictionary(image => image.UrlFilename);

var existingImages = (
from e in db.ImageInReplies.AsTracking()
Expand Down Expand Up @@ -94,11 +96,13 @@ on existing.UrlFilename equals newInContent.UrlFilename
select (existing, newInContent))
.ForEach(t => t.existing.ExpectedByteSize = t.newInContent.ExpectedByteSize);

(from existing in existingImages.Values
(from existingOrNew in existingImages.Values
.Concat(newImages.Values)
.DistinctBy(image => image.UrlFilename) // the earliest one in order takes the first
join replyContentImage in replyContentImages
on existing.UrlFilename equals replyContentImage.ImageInReply.UrlFilename
select (existing, replyContentImage))
.ForEach(t => t.replyContentImage.ImageInReply = t.existing);
on existingOrNew.UrlFilename equals replyContentImage.ImageInReply.UrlFilename
select (existingOrNew, replyContentImage))
.ForEach(t => t.replyContentImage.ImageInReply = t.existingOrNew);
var existingReplyContentImages = db.ReplyContentImages.AsNoTracking()
.Where(replyContentImages.Aggregate(
LinqKit.PredicateBuilder.New<ReplyContentImage>(),
Expand Down

0 comments on commit c4abb9c

Please sign in to comment.