Skip to content

Commit

Permalink
+ variable replyContentsKeyByPid to prevent re-parsing encoded prot…
Browse files Browse the repository at this point in the history
…oBuf of `Content` in the param `writingEntityEntriesAction` of `TransformEntityWorker.Transform()` that just get parsed and mutated before in param `writingEntityMutator`

* reduce the default value of `saveWritingEntitiesBatchSize` from 10k to 1k for reducing the overhead of execute the long sql with 20k params in prepared statement
* extract the value being passed as the param `saveWritingEntitiesBatchSize` of `TransformEntityWorker.Transform()` to configurable
@ `ProcessImagesInAllReplyContentsWorker.DoWork()`
@ crawler

* update NuGet package `SonarAnalyzer.CSharp` @ shared
@ c#
  • Loading branch information
n0099 committed Jun 12, 2024
1 parent a53b6c6 commit 2537cad
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 18 deletions.
3 changes: 3 additions & 0 deletions c#/crawler/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@
}
}
},
"ProcessImagesInAllReplyContents": {
"SaveWritingEntitiesBatchSize": 1000
},
"Logging": {
"LogLevel": {
"Default": "Trace",
Expand Down
38 changes: 21 additions & 17 deletions c#/crawler/src/Worker/ProcessImagesInAllReplyContentsWorker.cs
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
using Microsoft.EntityFrameworkCore.ChangeTracking;

namespace tbm.Crawler.Worker;

public class ProcessImagesInAllReplyContentsWorker(
ILogger<ProcessImagesInAllReplyContentsWorker> logger,
IConfiguration config,
Func<Owned<CrawlerDbContext.NewDefault>> dbContextDefaultFactory,
Func<Owned<CrawlerDbContext.New>> dbContextFactory,
ReplyContentImageSaver replyContentImageSaver)
: TransformEntityWorker<CrawlerDbContext, ReplyContent, ReplyContent, Pid>(logger)
{
protected override async Task DoWork(CancellationToken stoppingToken)
{
var saveWritingEntitiesBatchSize = config
.GetSection("ProcessImagesInAllReplyContents")
.GetValue("SaveWritingEntitiesBatchSize", 1000);
var stopwatch = new Stopwatch();
stopwatch.Start();
await using var dbDefaultFactory = dbContextDefaultFactory();
var db = dbDefaultFactory.Value();
foreach (var fid in from e in db.Forums select e.Fid)
{
logger.LogInformation("Simplify images in reply contents of fid {} started", fid);
var replyContentsKeyByPid = new Dictionary<Pid, RepeatedField<Content>>(saveWritingEntitiesBatchSize);
await using var dbFactory = dbContextFactory();
await Transform(
() => dbFactory.Value(fid),
saveWritingEntitiesBatchSize: 10000,
saveWritingEntitiesBatchSize,
readingEntity => readingEntity.Pid,
readingEntity => new()
{
Expand All @@ -33,11 +36,15 @@ await Transform(
{
if (readingEntity.ProtoBufBytes == null) return;
var pid = readingEntity.Pid;
var protoBuf = PostContentWrapper.Parser.ParseFrom(readingEntity.ProtoBufBytes);
var reply = new Reply {Pid = pid, Content = {protoBuf.Value}};
var reply = new Reply
{
Pid = pid,
Content = {PostContentWrapper.Parser.ParseFrom(readingEntity.ProtoBufBytes).Value}
};
ReplyParser.SimplifyImagesInReplyContent(logger, ref reply);
var bytes = Helper.SerializedProtoBufWrapperOrNullIfEmpty(reply.Content, Helper.WrapPostContent);
writingEntity.ProtoBufBytes = bytes;
replyContentsKeyByPid.Add(pid, reply.Content);
writingEntity.ProtoBufBytes = Helper
.SerializedProtoBufWrapperOrNullIfEmpty(reply.Content, Helper.WrapPostContent);
},
(writingDb, writingEntityEntries) =>
{
Expand All @@ -46,16 +53,13 @@ await Transform(
var p = ee.Property(e => e.ProtoBufBytes);
p.IsModified = !ByteArrayEqualityComparer.Instance.Equals(p.OriginalValue, p.CurrentValue);
});
replyContentImageSaver.Save(writingDb, writingEntityEntries
.Select(ee => ee.Entity)
.Select(e => new ReplyPost
{
Pid = e.Pid,
Content = null!,
ContentsProtoBuf = e.ProtoBufBytes == null
? new()
: PostContentWrapper.Parser.ParseFrom(e.ProtoBufBytes).Value
}))();
replyContentImageSaver.Save(writingDb, replyContentsKeyByPid.Select(pair => new ReplyPost
{
Pid = pair.Key,
Content = null!,
ContentsProtoBuf = pair.Value
}));
replyContentsKeyByPid.Clear();
},
stoppingToken);
logger.LogInformation("Simplify images in reply contents of fid {} finished after {:F2}s",
Expand Down
2 changes: 1 addition & 1 deletion c#/shared/tbm.Shared.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<PackageReference ExcludeAssets="compile" Include="Microsoft.VisualStudio.Threading.Analyzers" Version="17.10.48" />
<PackageReference ExcludeAssets="compile" Include="Roslynator.Analyzers" Version="4.12.4" />
<PackageReference ExcludeAssets="compile" Include="SharpSource" Version="1.24.0" />
<PackageReference ExcludeAssets="compile" Include="SonarAnalyzer.CSharp" Version="9.26.0.92422" />
<PackageReference ExcludeAssets="compile" Include="SonarAnalyzer.CSharp" Version="9.27.0.93347" />
<PackageReference ExcludeAssets="compile" Include="StyleCop.Analyzers.Unstable" Version="1.2.0.556" />
</ItemGroup>
</When>
Expand Down

0 comments on commit 2537cad

Please sign in to comment.