-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
+ entity class `LatestReplier` that has one-to-one relationship with entity `ThreadPost.LatestReplier` * replace field `LatestReplierUid` with foreign key to entity `LatestReplier` @ ThreadPost.cs, also affects `ThreadRevision`, `ThreadSaver` & `CrawlPost.CrawlReplies()` * replace field `_latestRepliers` with `_latestRepliersKeyByUnique` to reuse latest repliers with same `UniqueLatestReplier` for `FillFromRequestingWith602()` @ ThreadCrawlFacade.cs * now will invoke `ThreadLatestReplierSaver.Save()` FieldRevisionIgnorance @ `Save()` * no longer ignrore revision for field `ThreadPost.LatestReplierUid` @ `FieldRevisionIgnorance()` @ ThreadSave.cs @ c#
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace tbm.Crawler.Db; | ||
|
||
public class LatestReplier : BaseUser | ||
{ | ||
[Key] [DatabaseGenerated(DatabaseGeneratedOption.Identity)] | ||
public uint Id { get; set; } | ||
Check notice on line 6 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (macos-latest) / ReSharper
Check notice on line 6 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (ubuntu-latest) / ReSharper
|
||
public long? Uid { get; set; } | ||
Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (macos-latest) / ReSharper
Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (macos-latest) / ReSharper
Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (ubuntu-latest) / ReSharper
Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (ubuntu-latest) / ReSharper
Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs GitHub Actions / runs-on (windows-latest) / ReSharper
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ public class ThreadCrawlFacade( | |
postParser, postSaverFactory.Invoke, | ||
userParserFactory.Invoke, userSaverFactory.Invoke) | ||
{ | ||
private readonly Dictionary<Uid, User> _latestRepliers = []; | ||
private readonly Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?> _latestRepliersKeyByUnique = []; | ||
|
||
public delegate ThreadCrawlFacade New(Fid fid, string forumName); | ||
|
||
|
@@ -44,8 +44,19 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid | |
{ // replace with more detailed location.name in the 6.0.2 response | ||
t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location); | ||
} | ||
var name = t.inResponse.LastReplyer.Name.NullIfEmpty(); | ||
var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty(); | ||
// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value | ||
t.parsed.LatestReplierUid = t.inResponse.LastReplyer?.Uid; | ||
var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier() | ||
Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
|
||
{ | ||
Name = name, | ||
DisplayName = name == nameShow ? null : nameShow | ||
Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
|
||
}; | ||
var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity); | ||
t.parsed.LatestReplier = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier) | ||
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
|
||
? existingLatestReplier | ||
: _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity; | ||
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (macos-latest) / build (crawler)
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (ubuntu-latest) / build (crawler)
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs GitHub Actions / runs-on (windows-latest) / build (crawler)
|
||
}); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
namespace tbm.Crawler.Tieba.Crawl.Saver; | ||
|
||
public class ThreadLatestReplierSaver(SaverLocks<ThreadLatestReplierSaver.UniqueLatestReplier>.New saverLocksFactory) | ||
{ | ||
private static readonly HashSet<UniqueLatestReplier> GlobalLockedLatestRepliers = []; | ||
private readonly Lazy<SaverLocks<UniqueLatestReplier>> _saverLocks = | ||
new(() => saverLocksFactory(GlobalLockedLatestRepliers)); | ||
|
||
public Action Save(CrawlerDbContext db, IReadOnlyCollection<ThreadPost> threads) | ||
{ | ||
var uniqueLatestRepliers = threads | ||
.Where(th => th.LatestReplier != null) | ||
.Select(UniqueLatestReplier.FromThread).ToList(); | ||
var existingLatestRepliers = db.LatestRepliers.AsNoTracking() | ||
.Where(uniqueLatestRepliers.Aggregate( | ||
LinqKit.PredicateBuilder.New<LatestReplier>(), | ||
(predicate, newOrExisting) => | ||
predicate.Or(LinqKit.PredicateBuilder | ||
.New<LatestReplier>(existing => | ||
existing.Name == newOrExisting.Name) | ||
.And(existing => | ||
existing.DisplayName == newOrExisting.DisplayName)))) | ||
.ToList(); | ||
(from existing in existingLatestRepliers | ||
join thread in threads | ||
on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread) | ||
select (existing, thread)) | ||
.ForEach(t => t.thread.LatestReplier = t.existing); | ||
|
||
_ = _saverLocks.Value.Acquire(uniqueLatestRepliers | ||
.Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier)) | ||
.ToList()); | ||
return _saverLocks.Value.Dispose; | ||
} | ||
|
||
public record UniqueLatestReplier(string? Name, string? DisplayName) | ||
{ | ||
public static UniqueLatestReplier FromLatestReplier(LatestReplier? latestReplier) => | ||
new(latestReplier?.Name, latestReplier?.DisplayName); | ||
|
||
public static UniqueLatestReplier FromThread(ThreadPost thread) => | ||
FromLatestReplier(thread.LatestReplier); | ||
} | ||
} |